# Clear environment of variables and functions
rm(list = ls(all = TRUE))
# Clear environmet of packages
if(is.null(sessionInfo()$otherPkgs) == FALSE)lapply(paste("package:", names(sessionInfo()$otherPkgs), sep=""), detach, character.only = TRUE, unload = TRUE)
library(tidyverse) # For wrangling, subseting, and ploting
library(lubridate) # For working with dates
library(stringr) # Used to manipulate strings
library(GGally) # For ggpairs, a detailed correlation graphic
library(gridExtra) # For grouping graphs
library(janitor) # Tidy cross tabs
library(MultinomialCI) # To calculate multinomial confidence intervals
library(htmlTable) # To format html tables
library(psych)
library(flexdashboard)
library(here)
library(plotly)
library(kableExtra)
library(scales)
library(cowplot)
# Load data
seattle_reign <- read.csv("Seattle_Reign.csv")
# Work with a copy of prod_data
seattle_reign_raw_data <- seattle_reign
# Review data structure
#str(seattle_reign)
#summary(seattle_reign)
# Filtering required variables based on the data dictionary
# factor_1 = attendance
# factor_2 = sales
# factor_3 = media
# factor_4 = merchandise
# factor_5 = demographics
# factor_6 = satisfaction
seattle_reign <- seattle_reign_raw_data %>%
mutate(
factor1_attend = as.factor(Attend1),
att_first_attend = case_when(Attend2 == 2013 ~ "Attended_2013",
Attend2 == 2014 ~ "Attended_2014",
Attend2 == 2015 ~ "Attended_2015",
Attend2 == 2016 ~ "Attended_2016",
TRUE ~ "NotAttended"),
att_first_attend = as.factor(att_first_attend),
att_attend_2016 = case_when(Attend3 == "Yes" ~ "1", TRUE ~ "0"),
att_attend_2016 = as.factor(att_attend_2016),
att_interest = case_when(str_detect(Interest,"I live and die with this team") ~ "Die_heart_fan",
str_detect(Interest,"I consider myself to be a loyal fan of this team") ~ "Loyal_fan",
str_detect(Interest,"I consider myself to be a moderate fan of this team") ~ "Moderate_fan",
str_detect(Interest,"I consider myself to be a low-level fan of this team") ~ "Low_level_fan",
TRUE ~ "Less_interest"),
att_interest = as.factor(att_interest),
att_gamewith = case_when(Gamewit1 == 1 ~ "Family",
Gamewit2 == 1 ~ "Friends",
Gamewit3 == 1 ~ "Business Associates",
Gamewit4 == 1 ~ "I attend games by myself",
TRUE ~ "NotApplicable"),
att_gamewith = as.factor(att_gamewith),
att_distance = (Travel5),
att_travel_mode = as.factor(Travel6),
att_promo_effect = case_when(str_detect(Promo1,"Has no influence on my attendance") ~ "No influence",
str_detect(Promo1,"Has a positive influence on my attendance") ~ "Positive influence",
str_detect(Promo1,"Had a negative influence on my attendance") ~ "Negative influence",
TRUE ~ "Unknown"),
att_promo_effect = as.factor(att_promo_effect),
att_player_fan1 = case_when(str_detect(IDplay1,"Agree") ~ "1",
str_detect(IDplay1,"Disagree") ~ "2",
str_detect(IDplay1,"Somewhat disagree") ~ "2",
str_detect(IDplay1,"Somewhat agree") ~ "1",
str_detect(IDplay1,"Strongly agree") ~ "1",
str_detect(IDplay1,"Strongly Disagree") ~ "2",
TRUE ~ "0"),
att_attend_2017_1 = case_when(str_detect(Attend17,"Agree") ~ "1",
str_detect(Attend17,"Disagree") ~ "2",
str_detect(Attend17,"Somewhat disagree") ~ "2",
str_detect(Attend17,"Somewhat agree") ~ "1",
str_detect(Attend17,"Strongly agree") ~ "1",
str_detect(Attend17,"Strongly Disagree") ~ "2",
TRUE ~ "0"),
att_player_fan = case_when(att_player_fan1 == 1 ~ "Player_Fan",
att_player_fan1 == 2 ~ "Not_a_Player_Fan",
att_player_fan1 == 0 ~ "Neutral"),
att_attend_2017 = case_when(att_attend_2017_1 == 1 ~ "1",
att_attend_2017_1 == 2 ~ "0",
att_attend_2017_1 == 0 ~ "0"),
att_player_fan = as.factor(att_player_fan),
att_attend_2017 = as.factor(att_attend_2017),
att_most_pref_time = case_when(GameTim1==1 ~ "Wed Evenings",
GameTim1==2 ~ "Friday Evenings",
GameTim1==3 ~ "Saturday Afternoons 1pm",
GameTim1==4 ~ "Saturday Afternoons 4pm",
GameTim1==5 ~ "Saturday Evenings",
GameTim1==6 ~ "Sunday Afternoons 4pm",
GameTim1==7 ~ "Sunday Evenings",
TRUE ~ "NoPreference"),
att_most_pref_time = as.factor(att_most_pref_time),
att_least_preferred_timing = case_when(GameTim7==1 ~ "Wed Evenings",
GameTim7==2 ~ "Friday Evenings",
GameTim7==3 ~ "Saturday Afternoons 1pm",
GameTim7==4 ~ "Saturday Afternoons 4pm",
GameTim7==5 ~ "Saturday Evenings",
GameTim7==6 ~ "Sunday Afternoons 4pm",
GameTim7==7 ~ "Sunday Evenings",
TRUE ~ "NoPreference"),
att_least_preferred_timing = as.factor(att_least_preferred_timing),
sales_ticket_type = as.factor(lasttype),
sales_numofmatch_2017 = (Gamepln1),
sales_tkt_type_2017 = case_when(Gampln2a == 1 ~ "Season Ticket",
Gampln2b == 1 ~ "5 Match Pack",
Gampln2c == 1 ~ "3 Match Pack",
Gampln2d == 1 ~ "Single Match",
Gampln2e == 1 ~ "Pitchside Table",
Gampln2f == 1 ~ "Undecided"),
sales_tkt_type_2017 = as.factor(sales_tkt_type_2017),
media_web = case_when(Media8_1 == 1|Media8_2 == 1|Media8_3==1|Media8_4==1|Media8_5==1 ~ "Website", Media8_1 == 2|Media8_2 == 2|Media8_3==2|Media8_4==2|Media8_5==2 ~ "Facebook",
Media8_1 == 3|Media8_2 == 3|Media8_3==3|Media8_4==3|Media8_5==3 ~ "Twitter",
Media8_1 == 4|Media8_2 == 4|Media8_3==4|Media8_4==4|Media8_5==4 ~ "Instragram",
Media8_1 == 5|Media8_2 == 5|Media8_3==5|Media8_4==5|Media8_5==5 ~ "Youtube",
TRUE ~ "NotAvailable"),
media_web = as.factor(media_web),
media_season_newspaper = (Media9),
media_season_tv = (Media10),
media_ads1 = case_when(str_detect(Media1,"Has no influence on my attendance") ~ "No_inf_newspaper",
str_detect(Media1,"Has a positive influence on my attendance") ~ "Newspaper_pos",
str_detect(Media1,"Had a negative influence on my attendance") ~ "Newspaper_neg",
TRUE ~ "Unknown"),
media_ads3 = case_when(str_detect(Media3,"Has no influence on my attendance") ~ "No_inf_FCBillboard",
str_detect(Media3,"Has a positive influence on my attendance") ~ "FCBillboard_pos",
str_detect(Media3,"Had a negative influence on my attendance") ~ "FCBillboard_neg",
TRUE ~ "Unknown"),
media_ads4 = case_when(str_detect(Media4,"Has no influence on my attendance") ~ "No_infl_radio",
str_detect(Media4,"Has a positive influence on my attendance") ~ "Radio_positive",
str_detect(Media4,"Had a negative influence on my attendance") ~ "Radio_negative",
TRUE ~ "Unknown"),
media_ads6 = case_when(str_detect(Media6,"Has no influence on my attendance") ~ "No_infl_internet",
str_detect(Media6,"Has a positive influence on my attendance") ~ "internet_positive",
str_detect(Media6,"Had a negative influence on my attendance") ~ "internet_negative",
TRUE ~ "Unknown"),
media_ads = case_when(media_ads1 == "Newspaper_pos" ~ "Newspaper_ads",
media_ads3 == "FCBillboard_pos" ~ "FCBillboard_ads",
media_ads4 == "Radio_positive" ~ "Radio_ads",
media_ads6 == "internet_positive" ~ "Internet_ads"
),
media_ads = as.factor(media_ads),
media_ads1 = as.factor(media_ads1),
media_ads3 = as.factor(media_ads3),
media_ads4 = as.factor(media_ads4),
media_ads6 = as.factor(media_ads6),
mer_buy_online1 = case_when(str_detect(Buyonl1,"Agree") ~ "1",
str_detect(Buyonl1,"Disagree") ~ "2",
str_detect(Buyonl1,"Somewhat disagree") ~ "2",
str_detect(Buyonl1,"Somewhat agree") ~ "1",
str_detect(Buyonl1,"Strongly agree") ~ "1",
str_detect(Buyonl1,"Strongly Disagree") ~ "2",
TRUE ~ "0"),
mer_buy_online1 = as.factor(mer_buy_online1),
mer_buy_match1 = case_when(str_detect(BuyMat1,"Agree") ~ "1",
str_detect(BuyMat1,"Disagree") ~ "2",
str_detect(BuyMat1,"Somewhat disagree") ~ "2",
str_detect(BuyMat1,"Somewhat agree") ~ "1",
str_detect(BuyMat1,"Strongly agree") ~ "1",
str_detect(BuyMat1,"Strongly Disagree") ~ "2",
TRUE ~ "0"),
mer_buy_match1 = as.factor(mer_buy_match1),
mer_buy_store1 = case_when(str_detect(BuyStor1,"Agree") ~ "1",
str_detect(BuyStor1,"Disagree") ~ "2",
str_detect(BuyStor1,"Somewhat disagree") ~ "2",
str_detect(BuyStor1,"Somewhat agree") ~ "1",
str_detect(BuyStor1,"Strongly agree") ~ "1",
str_detect(BuyStor1,"Strongly Disagree") ~ "2",
TRUE ~ "0"),
mer_buy_store1 = as.factor(mer_buy_store1),
mer_buy_online = case_when(mer_buy_online1 == 1 ~ "Buy_Online",
mer_buy_online1 == 2 ~ "DoNot_Buy_Online",
mer_buy_online1 == 0 ~ "Neutral"),
mer_buy_match = case_when(mer_buy_match1 == 1 ~ "Buy_atMatch",
mer_buy_match1 == 2 ~ "DoNot_Buy_atMatch",
mer_buy_match1 == 0 ~ "Neutral"),
mer_buy_store = case_when(mer_buy_store1 == 1 ~ "Buy_atStore",
mer_buy_store1 == 2 ~ "DoNot_Buy_atStore",
mer_buy_store1 == 0 ~ "Neutral"),
mer_buy_online = as.factor(mer_buy_online),
mer_buy_match = as.factor(mer_buy_match),
mer_buy_store = as.factor(mer_buy_store),
mer_buy_pref = case_when(mer_buy_online == "Buy_Online" ~ "Online",
mer_buy_match == "Buy_atMatch" ~ "During Match",
mer_buy_store == "Buy_atStore" ~ "At Store",
TRUE ~ "Neutral"),
mer_buy_pref = as.factor(mer_buy_pref),
mer_purchase_sponsors = case_when(Purchs1 == "Yes" ~ "Carter Subaru",
Purchs2 == "Yes" ~ "Microsoft",
Purchs3 == "Yes" ~ "Pepsi",
Purchs4 == "Yes" ~ "BECU",
Purchs9 == "Yes" ~ "Kraken Congee",
Purchs10 == "Yes" ~ "Ruffneck Scarves",
TRUE ~ "Not sure"),
mer_purchase_sponsors = as.factor(mer_purchase_sponsors),
mer_reign = case_when(Shirt1 == 1 ~ "Short_Sleeve_Shirt", Shirt2 == 1 ~ "Long_Sleeve_Shirt",
Shirt3 == 1 ~ "Jackets",
Shirt4 == 1 ~ "Accessories",
TRUE ~ "No preferences"
),
mer_reign = as.factor(mer_reign),
dem_gender = as.factor(Gender),
dem_sexualOrient = as.factor(SexOrient),
dem_race = as.factor(Race),
att_seaEvent = case_when(SeaEvnt1 == 1 ~ "Seattle Storm",
SeaEvnt2 == 1 ~ "Seattle Seahawks",
SeaEvnt3 == 1 ~ "Seattle Sounders",
SeaEvnt4 == 1 ~ "Seattle Mariners",
SeaEvnt6 == 1 ~ "Sounders Women",
SeaEvnt7 == 1 ~ "Seattle Majestics",
SeaEvnt8 == 1 ~ "Tacoma Stars",
SeaEvnt9 == 1 ~ "Rat City Rollergirls",
TRUE ~ "NoAnswer"),
att_seaEvent = as.factor(att_seaEvent),
dem_marital_status = (Marital),
dem_education = (Educat),
sat_price_food = case_when(str_detect(Consat4,"Very Satisfied") ~ "S",
str_detect(Consat4,"Very Dissatisfied") ~ "D",
TRUE ~ "N"),
sat_quality = case_when(str_detect(Consat1,"Very Satisfied") ~ "S",
str_detect(Consat1,"Very Dissatisfied") ~ "D",
TRUE ~ "N"),
sat_variety = case_when(str_detect(Consat2,"Very Satisfied") ~ "S",
str_detect(Consat2,"Very Dissatisfied") ~ "D",
TRUE ~ "N"),
sat_price_drink = case_when(str_detect(Consat5,"Very Satisfied") ~ "S",
str_detect(Consat5,"Very Dissatisfied") ~ "D",
TRUE ~ "N"),
sat_beer = case_when(str_detect(Beersat1,"Very Satisfied") ~ "S",
str_detect(Beersat1,"Very Dissatisfied") ~ "D",
TRUE ~ "N"),
sat_gameday = case_when(sat_price_food == "S" ~ "Satisfactory food price",
sat_quality == "S" ~ "Satisfactory quality",
sat_price_drink == "S" ~ "Satisfactory drink price",
sat_variety == "S" ~ "Satisfied with variety",
sat_beer == "S" ~ "Satisfactory beer service",
sat_price_food == "D" ~ "Dissatisfying food price",
sat_quality == "D" ~ "Dissatisfying quality",
sat_price_drink == "D" ~ "Dissatisfying drink price",
sat_variety == "D" ~ "Dissatisfied with variety",
sat_beer == "D" ~ "Dissatisfying beer service",
TRUE ~ "Neutral"),
cost1 = case_when(str_detect(Cost3,"Agree") ~ "A",
str_detect(Cost3,"Disagree") ~ "D",
str_detect(Cost3,"Somewhat disagree") ~ "D",
str_detect(Cost3,"Somewhat agree") ~ "A",
str_detect(Cost3,"Strongly agree") ~ "A",
str_detect(Cost3,"Strongly Disagree") ~ "D",
TRUE ~ "N"),
cost = if_else(cost1 == "A", "Costly", "Not Costly"),
cost = as.factor(cost),
sat_gameday = as.factor(sat_gameday),
sat_skill = case_when(str_detect(Skill2,"Agree") ~ "A",
str_detect(Skill2,"Disagree") ~ "D",
str_detect(Skill2,"Somewhat disagree") ~ "D",
str_detect(Skill2,"Somewhat agree") ~ "A",
str_detect(Skill2,"Strongly agree") ~ "A",
str_detect(Skill2,"Strongly Disagree") ~ "D",
TRUE ~ "N"),
sat_success = case_when(str_detect(Success2,"Agree") ~ "A",
str_detect(Success2,"Disagree") ~ "D",
str_detect(Success2,"Somewhat disagree") ~ "D",
str_detect(Success2,"Somewhat agree") ~ "A",
str_detect(Success2,"Strongly agree") ~ "A",
str_detect(Success2,"Strongly Disagree") ~ "D",
TRUE ~ "N"),
sat_drama = case_when(str_detect(Drama4,"Agree") ~ "A",
str_detect(Drama4,"Disagree") ~ "D",
str_detect(Drama4,"Somewhat disagree") ~ "D",
str_detect(Drama4,"Somewhat agree") ~ "A",
str_detect(Drama4,"Strongly agree") ~ "A",
str_detect(Drama4,"Strongly Disagree") ~ "D",
TRUE ~ "N"),
sat_cost = case_when(str_detect(Cost3,"Agree") ~ "A",
str_detect(Cost3,"Disagree") ~ "D",
str_detect(Cost3,"Somewhat disagree") ~ "D",
str_detect(Cost3,"Somewhat agree") ~ "A",
str_detect(Cost3,"Strongly agree") ~ "A",
str_detect(Cost3,"Strongly Disagree") ~ "D",
TRUE ~ "N"),
satisfacn_perf = case_when(sat_skill == "A" ~ "Satisfied_with_skill",
sat_success == "A" ~ "Satisfied_with_success",
sat_drama == "A" ~ "Satisfied_with_drama",
sat_cost == "A" ~ "Satisfied_with_cost",
sat_skill == "D" ~ "Not Satisfied_skill",
sat_success == "D" ~ "TNot Satisfied_success",
sat_drama == "D" ~ "Not Satisfied_drama",
sat_cost == "D" ~ "Not Satisfied_cost",
TRUE ~ "Not Satisfied"),
satisfacn_perf = as.factor(satisfacn_perf)
) %>%
filter(att_distance != "NA") %>%
filter(sales_numofmatch_2017 != "NA") %>%
filter(media_season_tv != "NA") %>%
filter(media_season_newspaper != "NA") %>%
#filter(media_ads!= "NA") %>%
filter(SpndlstS != "NA") %>%
filter(SpndlstO != "NA") %>%
rename(sales_exp_self2016 = SpndlstS,
sales_exp_oth2016 = SpndlstO) %>%
select(factor1_attend,att_first_attend,att_interest,att_gamewith,att_travel_mode,att_distance,
att_promo_effect,att_player_fan,att_seaEvent, att_most_pref_time,
att_least_preferred_timing,att_attend_2017,att_attend_2016, att_gamewith,
sales_ticket_type, sales_exp_self2016,
sales_exp_oth2016,sales_numofmatch_2017,sales_tkt_type_2017,media_web,
media_season_newspaper,media_season_tv,
mer_buy_pref,cost,
mer_purchase_sponsors,mer_reign,dem_gender,dem_sexualOrient,dem_marital_status,
dem_education,sat_gameday,satisfacn_perf) %>%
drop_na()
#Examine the first 10 observations to make sure data set looks ok
#head(seattle_reign,10)
# Descriptive statistics
#summary(seattle_reign)
# Review data structures
#str(seattle_reign)
# The palette with grey:
cbPalette <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
# Select required columns from raw data
sea_reign_data <- seattle_reign %>%
select(att_first_attend,att_interest,att_distance,
att_promo_effect,att_seaEvent, att_most_pref_time,
att_attend_2017,att_attend_2016,att_gamewith,
sales_ticket_type, sales_exp_self2016,
sales_exp_oth2016,sales_numofmatch_2017,sales_tkt_type_2017,
media_web,cost,
media_season_newspaper,media_season_tv,
mer_buy_pref,
mer_purchase_sponsors,mer_reign,dem_gender,dem_sexualOrient,dem_marital_status,
dem_education,sat_gameday,satisfacn_perf)
summary(sea_reign_data)
att_first_attend att_interest att_distance att_promo_effect att_seaEvent
Attended_2013:109 Die_heart_fan: 29 Min. : 0.0 Negative influence: 1 NoAnswer : 56
Attended_2014:113 Less_interest: 1 1st Qu.: 5.0 No influence : 85 Rat City Rollergirls: 4
Attended_2015:123 Low_level_fan: 25 Median :10.0 Positive influence:162 Seattle Mariners : 27
Attended_2016: 69 Loyal_fan :241 Mean :15.8 Unknown :166 Seattle Seahawks : 59
NotAttended : 0 Moderate_fan :118 3rd Qu.:20.0 Seattle Sounders :159
Max. :80.0 Seattle Storm :108
Sounders Women : 1
att_most_pref_time att_attend_2017 att_attend_2016 att_gamewith sales_ticket_type
Sunday Evenings :248 0: 41 0: 58 Family :306 3 Match Pack : 5
Sunday Afternoons 4pm : 54 1:373 1:356 Friends : 90 5 Match Pack : 17
NoPreference : 35 I attend games by myself: 18 I don't know : 26
Saturday Afternoons 4pm: 21 NotApplicable : 0 Pitchside Table: 2
Wed Evenings : 21 Season Ticket :134
Saturday Evenings : 17 Single Match :230
(Other) : 18
sales_exp_self2016 sales_exp_oth2016 sales_numofmatch_2017 sales_tkt_type_2017 media_web cost
Min. : 0.00 Min. : 0.00 Min. : 0.000 3 Match Pack : 21 Facebook : 47 Costly :344
1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 2.000 5 Match Pack : 30 Instragram : 5 Not Costly: 70
Median : 25.00 Median : 20.00 Median : 5.000 Pitchside Table: 4 NotAvailable: 56
Mean : 43.56 Mean : 39.47 Mean : 5.804 Season Ticket :134 Twitter : 16
3rd Qu.: 60.00 3rd Qu.: 60.00 3rd Qu.:10.000 Single Match :186 Website :275
Max. :250.00 Max. :200.00 Max. :16.000 Undecided : 39 Youtube : 15
media_season_newspaper media_season_tv mer_buy_pref mer_purchase_sponsors mer_reign
Min. :0.000 Min. : 0.000 At Store : 26 BECU : 45 Accessories : 22
1st Qu.:0.000 1st Qu.: 0.000 During Match:116 Carter Subaru : 20 Jackets : 16
Median :1.000 Median : 1.000 Neutral :130 Kraken Congee : 8 Long_Sleeve_Shirt : 47
Mean :1.401 Mean : 1.874 Online :142 Microsoft :154 No preferences : 73
3rd Qu.:2.000 3rd Qu.: 3.000 Not sure : 97 Short_Sleeve_Shirt:256
Max. :5.000 Max. :10.000 Pepsi : 80
Ruffneck Scarves: 10
dem_gender dem_sexualOrient dem_marital_status dem_education
Female :252 Bisexual : 20 Married/Partnered:286 College Graduate :182
Male :157 Gay/Lesbian : 76 Other : 3 Graduate Degree :177
Prefer not to answer: 5 Heterosexual :297 Single :125 High School Graduate: 7
Other : 8 Other : 2
Prefer not to answer: 13 Some College : 46
sat_gameday satisfacn_perf
Neutral :325 Not Satisfied : 0
Dissatisfying beer service: 25 Not Satisfied_skill : 2
Satisfactory food price : 22 Satisfied_with_cost : 3
Satisfactory quality : 13 Satisfied_with_drama : 6
Satisfactory beer service : 7 Satisfied_with_skill :393
Dissatisfying food price : 6 Satisfied_with_success: 10
(Other) : 16
Ask questions to follow up on in detailed EDA
1. How do the attendance vary when people travelled longer distance to watch the match?
2. How do the attendance vary when people attended a previous match and when they have not?
3. How does media such as website, facebook effect the number of people who are likely to attend a match in 2017? 4. What is the purchasing habits of people who have attended previous matches? 5. How is the overall game day experience of people who have attended the matches in past? 6. What is the impact of sponsors in selling the merchandise at store vs online vs during match?
# Selecting relevant columns for flexdashboard
sea_flexdash <- sea_reign_data %>%
select(att_promo_effect, att_most_pref_time,sales_exp_self2016, att_attend_2016,
sales_exp_oth2016,sales_numofmatch_2017,sales_tkt_type_2017,mer_buy_pref,
att_attend_2017, mer_reign,dem_gender,dem_education)
write.csv(sea_flexdash, "/Users/LoveHonu/Documents/Winter2019/BUAN5210-Visualization/sea_flexdash.csv")
summary(sea_flexdash)
att_promo_effect att_most_pref_time sales_exp_self2016 att_attend_2016 sales_exp_oth2016
Negative influence: 1 Sunday Evenings :248 Min. : 0.00 0: 58 Min. : 0.00
No influence : 85 Sunday Afternoons 4pm : 54 1st Qu.: 0.00 1:356 1st Qu.: 0.00
Positive influence:162 NoPreference : 35 Median : 25.00 Median : 20.00
Unknown :166 Saturday Afternoons 4pm: 21 Mean : 43.56 Mean : 39.47
Wed Evenings : 21 3rd Qu.: 60.00 3rd Qu.: 60.00
Saturday Evenings : 17 Max. :250.00 Max. :200.00
(Other) : 18
sales_numofmatch_2017 sales_tkt_type_2017 mer_buy_pref att_attend_2017 mer_reign
Min. : 0.000 3 Match Pack : 21 At Store : 26 0: 41 Accessories : 22
1st Qu.: 2.000 5 Match Pack : 30 During Match:116 1:373 Jackets : 16
Median : 5.000 Pitchside Table: 4 Neutral :130 Long_Sleeve_Shirt : 47
Mean : 5.804 Season Ticket :134 Online :142 No preferences : 73
3rd Qu.:10.000 Single Match :186 Short_Sleeve_Shirt:256
Max. :16.000 Undecided : 39
dem_gender dem_education
Female :252 College Graduate :182
Male :157 Graduate Degree :177
Prefer not to answer: 5 High School Graduate: 7
Other : 2
Some College : 46
# group_by() flavor
attendance_first <- sea_reign_data %>% # %>% Pipes data to group_by() function
group_by(att_first_attend) %>% # group_by() organises data by region
summarise(count = n(), # count is the new variable name, n() is a counting function
percent = (sum(count) / nrow(sea_reign_data)) * 100, # percent is a new variable, sum() and nrow() are functions
avg_distance = mean(att_distance)) # mean distance travelled
attendance_first
attendance_interest <- sea_reign_data %>%
group_by(att_interest) %>%
summarise(count = n(),
percent = (sum(count) / nrow(sea_reign_data)) * 100, # percent is a new variable, sum() and nrow() are functions
avg_distance = mean(att_distance)) # mean distance travelled
attendance_interest
# group_by() brand name
attendance_local_event <- sea_reign_data %>%
group_by(att_seaEvent) %>% # group_by() organises data by producer
summarise(count = n(),
percent = (sum(count) / nrow(sea_reign_data)) * 100, # percent is a new variable, sum() and nrow() are functions
avg_distance = mean(att_distance)) # mean distance travelled
attendance_local_event
# group_by() ad
attendance_promo_effect <- sea_reign_data %>%
group_by(att_promo_effect) %>% # group_by() organises data by producer
summarise(count = n(),
percent = (sum(count) / nrow(sea_reign_data)) * 100, # percent is a new variable, sum() and nrow() are functions
avg_distance = mean(att_distance)) # mean distance travelled
attendance_promo_effect
# group_by() ad
attendance_2017 <- sea_reign_data %>%
group_by(att_attend_2017) %>%
summarise(count = n(),
percent = (sum(count) / nrow(sea_reign_data)) * 100,
avg_distance = mean(att_distance))
attendance_2017
# group_by() ad
attendance_timing <- sea_reign_data %>%
group_by(att_most_pref_time) %>%
summarise(count = n(),
percent = (sum(count) / nrow(sea_reign_data)) * 100,
avg_distance = mean(att_distance))
attendance_timing
sales_ticket_type <- sea_reign_data %>%
group_by(sales_ticket_type) %>% # group_by() organises data
summarise(count = n(),
percent = (sum(count) / nrow(sea_reign_data)) * 100,
avg_exp_self = mean(sales_exp_self2016), # mean distance travelled
avg_exp_others = mean(sales_exp_oth2016)) # mean distance travelled
# count_match_2016 = sum(att_first_attend_2016)) # mean distance travelled
sales_ticket_type
sales_ticket_type_2017 <- sea_reign_data %>%
group_by(sales_tkt_type_2017) %>% # group_by() organises data by ticket type
summarise(count = n(),
percent = (sum(count) / nrow(sea_reign_data)) * 100,
avg_exp_self = mean(sales_exp_self2016),
avg_exp_others = mean(sales_exp_oth2016), # mean expenditure
numberofmatch_2017 = sum(sales_numofmatch_2017)) # mean of num of matches
sales_ticket_type_2017
media_web <- sea_reign_data %>%
group_by(media_web) %>% # group_by() organises data by web media used
summarise(count = n(),
percent = (sum(count) / nrow(sea_reign_data)) * 100,
numberofmatch_2017 = sum(sales_numofmatch_2017)) # mean of num of matches
media_web
merchandise_sponsors <- sea_reign_data %>%
group_by(mer_purchase_sponsors) %>% # group_by() organises data by web media used
summarise(count = n(),
percent = (sum(count) / nrow(sea_reign_data)) * 100,
numberofmatch_2017 = sum(sales_numofmatch_2017)) # mean of num of matches
merchandise_sponsors
merchandise_reign <- sea_reign_data %>%
group_by(mer_reign) %>% # group_by() organises data by web media used
summarise(count = n(),
percent = (sum(count) / nrow(sea_reign_data)) * 100,
numberofmatch_2017 = sum(sales_numofmatch_2017)) # mean of num of matches
merchandise_reign
# Code histograms using grid.arrange to see quant variables together group by when first attended a Seattle Reign FC match
grid.arrange(
sea_reign_data %>%
ggplot(aes(x = att_first_attend, y = att_distance)) +
geom_bar(stat = "identity") +
coord_flip(),
sea_reign_data %>%
ggplot(aes(x = att_first_attend, y = media_season_newspaper)) +
geom_bar(stat = "identity") +
coord_flip(),
sea_reign_data %>%
ggplot(aes(x = att_first_attend, y = media_season_tv)) +
geom_bar(stat = "identity") +
coord_flip(),
sea_reign_data %>%
ggplot(aes(x = att_first_attend, y = sales_numofmatch_2017)) +
geom_bar(stat = "identity") +
coord_flip(),
ncol = 2
)
# Code histograms using grid.arrange so can see all quant variables together group by local events attendance
grid.arrange(
sea_reign_data %>%
ggplot(aes(x = att_seaEvent, y = att_distance)) +
geom_bar(stat = "identity") +
coord_flip(),
sea_reign_data %>%
ggplot(aes(x = att_seaEvent, y = media_season_newspaper)) +
geom_bar(stat = "identity") +
coord_flip(),
sea_reign_data %>%
ggplot(aes(x = att_seaEvent, y = media_season_tv)) +
geom_bar(stat = "identity") +
coord_flip(),
sea_reign_data %>%
ggplot(aes(x = att_seaEvent, y = sales_numofmatch_2017)) +
geom_bar(stat = "identity") +
coord_flip(),
ncol = 2
)
# Code histograms using grid.arrange so can see all quant variables together group by most preferred timings
grid.arrange(
sea_reign_data %>%
ggplot(aes(x = att_most_pref_time, y = att_distance)) +
geom_bar(stat = "identity") +
coord_flip(),
sea_reign_data %>%
ggplot(aes(x = att_most_pref_time, y = sales_numofmatch_2017)) +
geom_bar(stat = "identity") +
coord_flip(),
ncol = 2
)
# Code histograms using grid.arrange so can see all quant variables together group by ticket type
grid.arrange(
sea_reign_data %>%
ggplot(aes(x = sales_ticket_type, y = att_distance)) +
geom_bar(stat = "identity") +
coord_flip(),
sea_reign_data %>%
ggplot(aes(x = sales_ticket_type, y = media_season_newspaper)) +
geom_bar(stat = "identity") +
coord_flip(),
sea_reign_data %>%
ggplot(aes(x = sales_ticket_type, y = media_season_tv)) +
geom_bar(stat = "identity") +
coord_flip(),
sea_reign_data %>%
ggplot(aes(x = sales_ticket_type, y = sales_numofmatch_2017)) +
geom_bar(stat = "identity") +
coord_flip(),
ncol = 2
)
# Code histograms using grid.arrange so can see all quant variables together group by advertisements
grid.arrange(
sea_reign_data %>%
ggplot(aes(x = media_web, y = att_distance)) +
geom_bar(stat = "identity") +
coord_flip(),
sea_reign_data %>%
ggplot(aes(x = media_web, y = media_season_newspaper)) +
geom_bar(stat = "identity") +
coord_flip(),
sea_reign_data %>%
ggplot(aes(x = media_web, y = media_season_tv)) +
geom_bar(stat = "identity") +
coord_flip(),
sea_reign_data %>%
ggplot(aes(x = media_web, y = sales_numofmatch_2017)) +
geom_bar(stat = "identity") +
coord_flip(),
ncol = 2
)
# Code histograms using grid.arrange so can see all quant variables together group by buying preferences
grid.arrange(
sea_reign_data %>%
ggplot(aes(x = mer_buy_pref, y = sales_exp_self2016)) +
geom_bar(stat = "identity") +
coord_flip(),
sea_reign_data %>%
ggplot(aes(x = mer_buy_pref, y = sales_exp_oth2016)) +
geom_bar(stat = "identity") +
coord_flip(),
sea_reign_data %>%
ggplot(aes(x = mer_buy_pref, y = sales_numofmatch_2017)) +
geom_bar(stat = "identity") +
coord_flip(),
ncol = 2
)
# Descriptive statistics
summary(sea_reign_data)
att_first_attend att_interest att_distance att_promo_effect att_seaEvent
Attended_2013:109 Die_heart_fan: 29 Min. : 0.0 Negative influence: 1 NoAnswer : 56
Attended_2014:113 Less_interest: 1 1st Qu.: 5.0 No influence : 85 Rat City Rollergirls: 4
Attended_2015:123 Low_level_fan: 25 Median :10.0 Positive influence:162 Seattle Mariners : 27
Attended_2016: 69 Loyal_fan :241 Mean :15.8 Unknown :166 Seattle Seahawks : 59
NotAttended : 0 Moderate_fan :118 3rd Qu.:20.0 Seattle Sounders :159
Max. :80.0 Seattle Storm :108
Sounders Women : 1
att_most_pref_time att_attend_2017 att_attend_2016 att_gamewith sales_ticket_type
Sunday Evenings :248 0: 41 0: 58 Family :306 3 Match Pack : 5
Sunday Afternoons 4pm : 54 1:373 1:356 Friends : 90 5 Match Pack : 17
NoPreference : 35 I attend games by myself: 18 I don't know : 26
Saturday Afternoons 4pm: 21 NotApplicable : 0 Pitchside Table: 2
Wed Evenings : 21 Season Ticket :134
Saturday Evenings : 17 Single Match :230
(Other) : 18
sales_exp_self2016 sales_exp_oth2016 sales_numofmatch_2017 sales_tkt_type_2017 media_web cost
Min. : 0.00 Min. : 0.00 Min. : 0.000 3 Match Pack : 21 Facebook : 47 Costly :344
1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 2.000 5 Match Pack : 30 Instragram : 5 Not Costly: 70
Median : 25.00 Median : 20.00 Median : 5.000 Pitchside Table: 4 NotAvailable: 56
Mean : 43.56 Mean : 39.47 Mean : 5.804 Season Ticket :134 Twitter : 16
3rd Qu.: 60.00 3rd Qu.: 60.00 3rd Qu.:10.000 Single Match :186 Website :275
Max. :250.00 Max. :200.00 Max. :16.000 Undecided : 39 Youtube : 15
media_season_newspaper media_season_tv mer_buy_pref mer_purchase_sponsors mer_reign
Min. :0.000 Min. : 0.000 At Store : 26 BECU : 45 Accessories : 22
1st Qu.:0.000 1st Qu.: 0.000 During Match:116 Carter Subaru : 20 Jackets : 16
Median :1.000 Median : 1.000 Neutral :130 Kraken Congee : 8 Long_Sleeve_Shirt : 47
Mean :1.401 Mean : 1.874 Online :142 Microsoft :154 No preferences : 73
3rd Qu.:2.000 3rd Qu.: 3.000 Not sure : 97 Short_Sleeve_Shirt:256
Max. :5.000 Max. :10.000 Pepsi : 80
Ruffneck Scarves: 10
dem_gender dem_sexualOrient dem_marital_status dem_education
Female :252 Bisexual : 20 Married/Partnered:286 College Graduate :182
Male :157 Gay/Lesbian : 76 Other : 3 Graduate Degree :177
Prefer not to answer: 5 Heterosexual :297 Single :125 High School Graduate: 7
Other : 8 Other : 2
Prefer not to answer: 13 Some College : 46
sat_gameday satisfacn_perf
Neutral :325 Not Satisfied : 0
Dissatisfying beer service: 25 Not Satisfied_skill : 2
Satisfactory food price : 22 Satisfied_with_cost : 3
Satisfactory quality : 13 Satisfied_with_drama : 6
Satisfactory beer service : 7 Satisfied_with_skill :393
Dissatisfying food price : 6 Satisfied_with_success: 10
(Other) : 16
# Code histograms using grid.arrange so can see all quant variables together
grid.arrange(
sea_reign_data %>%
ggplot(aes(att_distance)) +
geom_histogram(),
sea_reign_data %>%
ggplot(aes(sales_numofmatch_2017)) +
geom_histogram(),
sea_reign_data %>%
ggplot(aes(sales_exp_self2016)) +
geom_histogram(),
sea_reign_data %>%
ggplot(aes(sales_exp_oth2016)) +
geom_histogram(),
ncol = 2
)
# Set grid pattern for graph arrangement
par(mfrow = c(2, 2))
boxplot(sea_reign_data$att_distance, main = "Attendance_distance_travelled")
boxplot(sea_reign_data$sales_numofmatch_2017, main = "Sales_NumberOfMatches")
boxplot(sea_reign_data$sales_exp_self2016, main = "Sales_Expenditure_Self2016")
boxplot(sea_reign_data$sales_exp_oth2016, main = "Sales_Expenditure_Others2016")
Cross-tabs are used for tabulation of categorical variables
addmargins(xtabs(~ att_first_attend + att_interest, data = sea_reign_data))
att_interest
att_first_attend Die_heart_fan Less_interest Low_level_fan Loyal_fan Moderate_fan Sum
Attended_2013 15 0 2 72 20 109
Attended_2014 7 1 6 64 35 113
Attended_2015 6 0 7 73 37 123
Attended_2016 1 0 10 32 26 69
NotAttended 0 0 0 0 0 0
Sum 29 1 25 241 118 414
addmargins(xtabs(~ att_first_attend + att_promo_effect, data = sea_reign_data))
att_promo_effect
att_first_attend Negative influence No influence Positive influence Unknown Sum
Attended_2013 0 21 41 47 109
Attended_2014 1 22 43 47 113
Attended_2015 0 24 50 49 123
Attended_2016 0 18 28 23 69
NotAttended 0 0 0 0 0
Sum 1 85 162 166 414
addmargins(xtabs(~ att_first_attend + att_most_pref_time, data = sea_reign_data))
att_most_pref_time
att_first_attend Friday Evenings NoPreference Saturday Afternoons 1pm Saturday Afternoons 4pm Saturday Evenings
Attended_2013 1 10 6 10 7
Attended_2014 1 8 2 5 4
Attended_2015 5 13 1 5 2
Attended_2016 1 4 1 1 4
NotAttended 0 0 0 0 0
Sum 8 35 10 21 17
att_most_pref_time
att_first_attend Sunday Afternoons 4pm Sunday Evenings Wed Evenings Sum
Attended_2013 19 49 7 109
Attended_2014 15 73 5 113
Attended_2015 13 81 3 123
Attended_2016 7 45 6 69
NotAttended 0 0 0 0
Sum 54 248 21 414
addmargins(xtabs(~ sales_numofmatch_2017 + media_web, data = sea_reign_data))
media_web
sales_numofmatch_2017 Facebook Instragram NotAvailable Twitter Website Youtube Sum
0 0 0 6 0 7 3 16
1 3 0 11 1 11 1 27
2 12 0 10 2 38 1 63
3 4 1 9 3 49 2 68
4 2 0 4 1 19 1 27
5 8 1 6 1 34 4 54
6 3 1 1 0 11 0 16
7 0 0 0 0 3 0 3
8 1 0 1 1 15 0 18
9 0 0 2 0 2 0 4
10 4 0 1 2 21 1 29
11 0 0 0 0 2 0 2
12 9 2 5 5 58 2 81
14 0 0 0 0 4 0 4
15 0 0 0 0 1 0 1
16 1 0 0 0 0 0 1
Sum 47 5 56 16 275 15 414
addmargins(xtabs(~ sales_numofmatch_2017 + mer_buy_pref, data = sea_reign_data))
mer_buy_pref
sales_numofmatch_2017 At Store During Match Neutral Online Sum
0 0 1 7 8 16
1 3 6 9 9 27
2 2 14 23 24 63
3 5 16 24 23 68
4 1 11 8 7 27
5 3 18 14 19 54
6 1 8 3 4 16
7 0 3 0 0 3
8 0 6 5 7 18
9 0 1 2 1 4
10 1 10 5 13 29
11 0 1 1 0 2
12 10 19 27 25 81
14 0 1 2 1 4
15 0 1 0 0 1
16 0 0 0 1 1
Sum 26 116 130 142 414
addmargins(xtabs(~ sales_numofmatch_2017 + mer_purchase_sponsors, data = sea_reign_data))
mer_purchase_sponsors
sales_numofmatch_2017 BECU Carter Subaru Kraken Congee Microsoft Not sure Pepsi Ruffneck Scarves Sum
0 0 1 0 4 9 2 0 16
1 0 1 1 8 9 7 1 27
2 5 1 1 27 19 10 0 63
3 6 4 0 21 20 13 4 68
4 4 0 0 11 5 6 1 27
5 6 3 0 24 11 9 1 54
6 2 2 0 5 3 4 0 16
7 1 0 0 1 0 1 0 3
8 1 1 0 7 2 6 1 18
9 1 0 0 2 1 0 0 4
10 2 2 2 9 6 8 0 29
11 1 0 1 0 0 0 0 2
12 14 5 3 31 12 14 2 81
14 1 0 0 3 0 0 0 4
15 1 0 0 0 0 0 0 1
16 0 0 0 1 0 0 0 1
Sum 45 20 8 154 97 80 10 414
addmargins(xtabs(~ sales_numofmatch_2017 + dem_gender, data = sea_reign_data))
dem_gender
sales_numofmatch_2017 Female Male Prefer not to answer Sum
0 8 8 0 16
1 15 12 0 27
2 30 33 0 63
3 42 25 1 68
4 19 8 0 27
5 26 27 1 54
6 13 3 0 16
7 3 0 0 3
8 11 7 0 18
9 2 2 0 4
10 20 8 1 29
11 2 0 0 2
12 57 22 2 81
14 2 2 0 4
15 1 0 0 1
16 1 0 0 1
Sum 252 157 5 414
addmargins(xtabs(~ sales_numofmatch_2017 + dem_marital_status, data = sea_reign_data))
dem_marital_status
sales_numofmatch_2017 Married/Partnered Other Single Sum
0 11 0 5 16
1 22 0 5 27
2 49 0 14 63
3 53 0 15 68
4 19 0 8 27
5 34 1 19 54
6 9 0 7 16
7 2 0 1 3
8 11 0 7 18
9 3 0 1 4
10 19 1 9 29
11 1 0 1 2
12 49 1 31 81
14 2 0 2 4
15 1 0 0 1
16 1 0 0 1
Sum 286 3 125 414
addmargins(xtabs(~ sales_numofmatch_2017 + dem_education, data = sea_reign_data))
dem_education
sales_numofmatch_2017 College Graduate Graduate Degree High School Graduate Other Some College Sum
0 10 6 0 0 0 16
1 7 15 0 0 5 27
2 19 32 2 0 10 63
3 27 32 1 1 7 68
4 10 14 0 0 3 27
5 25 18 4 1 6 54
6 8 8 0 0 0 16
7 3 0 0 0 0 3
8 11 6 0 0 1 18
9 0 4 0 0 0 4
10 11 13 0 0 5 29
11 1 1 0 0 0 2
12 46 26 0 0 9 81
14 2 2 0 0 0 4
15 1 0 0 0 0 1
16 1 0 0 0 0 1
Sum 182 177 7 2 46 414
# Proportions might be more informative
addmargins(round(prop.table(xtabs(~ att_first_attend + att_interest, data = sea_reign_data)), 3))
att_interest
att_first_attend Die_heart_fan Less_interest Low_level_fan Loyal_fan Moderate_fan Sum
Attended_2013 0.036 0.000 0.005 0.174 0.048 0.263
Attended_2014 0.017 0.002 0.014 0.155 0.085 0.273
Attended_2015 0.014 0.000 0.017 0.176 0.089 0.296
Attended_2016 0.002 0.000 0.024 0.077 0.063 0.166
NotAttended 0.000 0.000 0.000 0.000 0.000 0.000
Sum 0.069 0.002 0.060 0.582 0.285 0.998
Use tile plot to show multivariate categorical
# Note have to calculate and provide a variable for filling the graph
sea_reign_data %>%
group_by(att_most_pref_time, att_attend_2016) %>%
summarise(count = n()) %>%
ggplot(aes(att_most_pref_time, att_attend_2016)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_tile(aes(fill = -count))
sea_reign_data %>%
group_by(sales_tkt_type_2017, att_attend_2017) %>%
summarise(count = n()) %>%
ggplot(aes(sales_tkt_type_2017, att_attend_2017)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_tile(aes(fill = -count))
sea_reign_data %>%
group_by(media_web, att_attend_2017) %>%
summarise(count = n()) %>%
ggplot(aes(media_web, att_attend_2017)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_tile(aes(fill = -count))
sea_reign_data %>%
group_by(mer_buy_pref, att_attend_2016) %>%
summarise(count = n()) %>%
ggplot(aes(mer_buy_pref, att_attend_2016)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_tile(aes(fill = -count))
# Use ggpairs from the GGally package
cor_plot <- sea_reign_data %>%
select(att_distance,sales_exp_self2016,sales_numofmatch_2017)
cor(cor_plot)
att_distance sales_exp_self2016 sales_numofmatch_2017
att_distance 1.00000000 0.05155513 -0.102486
sales_exp_self2016 0.05155513 1.00000000 0.340335
sales_numofmatch_2017 -0.10248605 0.34033502 1.000000
# Pairwise plot
pairs_plot <- sea_reign_data %>%
select(att_distance,sales_exp_self2016,sales_numofmatch_2017)
pairs_plot %>%
ggpairs()
pairs.panels(pairs_plot)
With more people likely to attend a 2017 matches, there are chances that people might spend more on merchandise as in 2016
# Standard scatter plot to visualize what is the revenue when producers receive maximum promos and ads
grid.arrange(
sea_reign_data %>%
ggplot(aes(x = att_attend_2016, y = sales_numofmatch_2017, color = dem_gender)) +
geom_point() +
geom_jitter() +
geom_smooth(method = "lm", se = FALSE),
sea_reign_data %>%
ggplot(aes(x = att_attend_2017, y = sales_numofmatch_2017, color = dem_gender)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
geom_point() +
geom_jitter() +
scale_fill_brewer(palette = "Reds") +
geom_smooth(method = "lm", se = FALSE),
ncol = 2
)
# Form table of medians
# Attendance when 2016 = 1
attendance_2016 <- sea_reign_data %>%
group_by(mer_buy_pref, dem_gender) %>%
filter(att_attend_2016 == 1) %>%
summarise(Sum_distance = mean(att_distance)) %>%
spread(dem_gender, Sum_distance) %>%
as.data.frame()
attendance_2016
Clustered barplot to compare distance travelled, expenditure on self, number of matches attending in 2017 across Merchandise
# First get the data and name the graph so we can build it by layer
merchandise_data1 <- sea_reign_data %>%
select(att_distance,mer_buy_pref) %>%
group_by(mer_buy_pref) %>% # Group_by is by factor level, so group by attendance
summarise_all(funs(mean)) %>% # Take the mean of all remaining columns so we can graph them
gather("Variable", "Mean", -(mer_buy_pref)) # gather is used to reshape the data, make a bar graph by "Variable"
merchandise_data2 <- sea_reign_data %>%
select(sales_exp_self2016,mer_buy_pref) %>%
group_by(mer_buy_pref) %>%
summarise_all(funs(mean)) %>%
gather("Variable", "Mean", -(mer_buy_pref))
merchandise_data3 <- sea_reign_data %>%
select(sales_exp_oth2016,mer_buy_pref) %>%
group_by(mer_buy_pref) %>%
summarise_all(funs(mean)) %>%
gather("Variable", "Mean", -(mer_buy_pref))
merchandise_data4 <- sea_reign_data %>%
select(sales_numofmatch_2017,mer_buy_pref) %>%
group_by(mer_buy_pref) %>%
summarise_all(funs(mean)) %>%
gather("Variable", "Mean", -(mer_buy_pref))
merchandise_data5 <- sea_reign_data %>%
select(att_distance,sales_exp_self2016,sales_numofmatch_2017,mer_buy_pref) %>%
group_by(mer_buy_pref) %>%
summarise_all(funs(mean)) %>%
gather("Variable", "Mean", -(mer_buy_pref))
merchandise_data1
merchandise_data2
merchandise_data3
merchandise_data4
# Now set frame for plot, notice new variables "Variable" and "Mean"
grid.arrange(
merchandise_plot1 <- merchandise_data1 %>%
ggplot(aes(x = Variable, y = Mean, fill = mer_buy_pref)) +
scale_fill_brewer(palette = "PuBuGn") +
geom_bar(stat = "identity", position = "dodge"), # Set stat and position to get side-by-side bar
merchandise_plot2 <- merchandise_data2 %>%
ggplot(aes(x = Variable, y = Mean, fill = mer_buy_pref)) +
scale_fill_brewer(palette = "PuBuGn") +
geom_bar(stat = "identity", position = "dodge"),
merchandise_plot3 <- merchandise_data3 %>%
ggplot(aes(x = Variable, y = Mean, fill = mer_buy_pref)) +
scale_fill_brewer(palette = "PuBuGn") +
geom_bar(stat = "identity", position = "dodge"),
merchandise_plot4 <- merchandise_data4 %>%
ggplot(aes(x = Variable, y = Mean, fill = mer_buy_pref)) +
scale_fill_brewer(palette = "PuBuGn") +
geom_bar(stat = "identity", position = "dodge"))
merchandise_plot5 <- merchandise_data5 %>%
ggplot(aes(x = Variable, y = Mean, fill = mer_buy_pref)) +
geom_bar(stat = "identity", position = "dodge")
Clustered barplot to compare distance travelled, expenditure on self, number of matches attending in 2017 for Media
media_data1 <- sea_reign_data %>%
select(att_distance,media_web) %>%
group_by(media_web) %>% # Group_by is by factor level, so group by media
summarise_all(funs(mean)) %>% # Take the mean of all remaining columns so we can graph them
gather("Variable", "Mean", -(media_web)) # gather is used to reshape the data, make a bar graph by "Variable"
media_data2 <- sea_reign_data %>%
select(sales_exp_self2016,media_web) %>%
group_by(media_web) %>%
summarise_all(funs(mean)) %>%
gather("Variable", "Mean", -(media_web))
media_data3 <- sea_reign_data %>%
select(sales_exp_oth2016,media_web) %>%
group_by(media_web) %>%
summarise_all(funs(mean)) %>%
gather("Variable", "Mean", -(media_web))
media_data4 <- sea_reign_data %>%
select(sales_numofmatch_2017,media_web) %>%
group_by(media_web) %>%
summarise_all(funs(mean)) %>%
gather("Variable", "Mean", -(media_web))
media_data5 <- sea_reign_data %>%
select(att_distance,sales_exp_self2016,sales_numofmatch_2017,media_web) %>%
group_by(media_web) %>%
summarise_all(funs(mean)) %>%
gather("Variable", "Mean", -(media_web))
media_data1
media_data2
media_data3
media_data4
# Set frame for plot, notice new variables "Variable" and "Mean"
grid.arrange(
media_plot1 <- media_data1 %>%
ggplot(aes(x = Variable, y = Mean, fill = media_web)) +
scale_fill_brewer(palette = "PuBuGn") +
geom_bar(stat = "identity", position = "dodge"), # Set stat and position to get side-by-side bar
media_plot2 <- media_data2 %>%
ggplot(aes(x = Variable, y = Mean, fill = media_web)) +
scale_fill_brewer(palette = "PuBuGn") +
geom_bar(stat = "identity", position = "dodge"),
media_plot3 <- media_data3 %>%
ggplot(aes(x = Variable, y = Mean, fill = media_web)) +
scale_fill_brewer(palette = "PuBuGn") +
geom_bar(stat = "identity", position = "dodge"),
media_plot4 <- media_data4 %>%
ggplot(aes(x = Variable, y = Mean, fill = media_web)) +
scale_fill_brewer(palette = "PuBuGn") +
geom_bar(stat = "identity", position = "dodge"))
media_plot5 <- media_data5 %>%
ggplot(aes(x = Variable, y = Mean, fill = media_web)) +
geom_bar(stat = "identity", position = "dodge")
Lets add a title and some bar text
# Add labels to each bar
merchandise_plot <- merchandise_plot5 + geom_text(aes(label = round(Mean, 1)), vjust = -0.5, color = "black",
position = position_dodge(.9), size = 4) +
# Add a main title
ggtitle("Comparison of distance,expenditure and matches in 2017 by buying preference",
subtitle = "Highest mean expenditure on self in 2016 season is at store\nHighest mean distance travelled to purchase merchandise in store") +
scale_fill_brewer(palette = "PuBuGn") +
theme(axis.ticks.x = element_blank(), # Use theme to clean up the graph to make more professional
axis.title.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.background = element_blank())
# Call the graph
merchandise_plot
# Add labels to each bar
media_plot <- media_plot5 + geom_text(aes(label = round(Mean, 1)), vjust = -0.5, color = "black",
position = position_dodge(.9), size = 3) +
# Add a main title
ggtitle("Comparison of distance,expenditure and matches in 2017 by buying preference",
subtitle = "Highest mean expenditure on self in 2016 season is at store\nHighest mean distance travelled to purchase merchandise in store") +
scale_fill_brewer(palette = "PuBuGn") +
theme(axis.ticks.x = element_blank(), # Use theme to clean up the graph to make more professional
axis.title.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.background = element_blank())
# Call the graph
media_plot
# Doing a subset of the table to just show variables of interest
timings_subset <- c("Wed Evenings", "Sunday Evenings","Sunday Afternoons 4pm","Friday Evenings", "Saturday Evenings",
"Saturday Afternoons 4pm", "Saturday Afternoons 4pm","Saturday Afternoons 1pm")
past_year_attendance <- sea_reign_data %>%
filter(att_most_pref_time %in% timings_subset)
attendance_plot <- past_year_attendance %>%
group_by(att_most_pref_time, att_first_attend) %>%
summarize(avg_distance = mean(att_distance))
ggplot(data = attendance_plot, aes(x = reorder(att_most_pref_time, avg_distance), y = avg_distance ,
fill = att_first_attend)) +
scale_fill_brewer(palette = "PuBuGn") +
geom_bar(stat="identity", position="dodge") +
coord_flip()
# Doing a subset of the table to just show variables of interest
timings_subset <- c("Wed Evenings", "Sunday Evenings","Sunday Afternoons 4pm","Friday Evenings", "Saturday Evenings",
"Saturday Afternoons 4pm", "Saturday Afternoons 4pm","Saturday Afternoons 1pm")
next_year_attendance <- sea_reign_data %>%
#filter(att_attend_2017 %in% attend_2017_subset) %>%
filter(att_most_pref_time %in% timings_subset)
attendance_plot1 <- next_year_attendance %>%
group_by(att_most_pref_time, att_attend_2017) %>%
summarize(avg_distance = mean(att_distance))
ggplot(data = attendance_plot1, aes(x = reorder(att_most_pref_time, avg_distance), y = avg_distance ,
fill = att_attend_2017)) +
scale_fill_brewer(palette = "PuBuGn") +
geom_bar(stat="identity", position="dodge") +
coord_flip()
# Doing a subset of the table to just show variables of interest
gamewith_subset <- c("3 Match Pack","Single Match","I don't know","5 Match Pack","Season Ticket")
ticket_type <- sea_reign_data %>%
filter(sales_ticket_type %in% gamewith_subset)
attendance_plot2 <- ticket_type %>%
group_by(sales_ticket_type,att_gamewith) %>%
summarize(avg_distance = mean(att_distance))
ggplot(data = attendance_plot2, aes(x = reorder(sales_ticket_type, avg_distance), y = avg_distance ,
fill = att_gamewith)) +
scale_fill_brewer(palette = "PuBuGn") +
geom_bar(stat="identity", position="dodge") +
coord_flip()
# Doing a subset of the table to just show variables of interest
timings_subset <- c("Wed Evenings", "Sunday Evenings","Sunday Afternoons 4pm","Friday Evenings", "Saturday Evenings",
"Saturday Afternoons 4pm", "Saturday Afternoons 4pm","Saturday Afternoons 1pm")
attend_2017_subset <- c("Attend_2017","Neutral")
next_year_attendance <- sea_reign_data %>%
filter(att_attend_2017 %in% attend_2017_subset) %>%
filter(att_most_pref_time %in% timings_subset)
merchandise_plot <- sea_reign_data %>%
group_by(mer_reign, mer_buy_pref) %>%
summarize(avg_expenditure = mean(sales_exp_self2016))
ggplot(data = merchandise_plot, aes(x = reorder(mer_reign, avg_expenditure), y = avg_expenditure ,
fill = mer_buy_pref)) +
scale_fill_brewer(palette = "PuBuGn") +
geom_bar(stat="identity", position="dodge") +
coord_flip()
mer_subset <- c("During Match","Online","At Store")
mer_reign_subset <- c("Short_Sleeve_Shirt","Long_Sleeve_Shirt","Accessories","Jackets")
merchandise <- sea_reign_data %>%
filter(mer_buy_pref %in% mer_subset) %>%
filter(mer_reign %in% mer_reign_subset)
merchandise %>%
filter(att_attend_2016 == 1) %>%
group_by(mer_buy_pref) %>%
summarise(Count = n(),
Average_Expenditure_Self = mean(sales_exp_self2016),
Average_Expenditure_Others = mean(sales_exp_oth2016)) %>%
arrange(desc(Average_Expenditure_Self)) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
row_spec(1, background = "#e5f5e0")
| mer_buy_pref | Count | Average_Expenditure_Self | Average_Expenditure_Others |
|---|---|---|---|
| During Match | 108 | 51.89815 | 45.87963 |
| Online | 120 | 50.12500 | 45.75000 |
| At Store | 23 | 47.60870 | 49.13043 |
Null Hypothesis: Average expedinture is same across all merchandise for people who attended season 2016
sea_hyp1 <- merchandise %>%
filter(att_attend_2016 == 1)
# Is expenditure the same across merchandises?
t.test(sea_hyp1$sales_exp_self2016[sea_hyp1$mer_buy_pref == "During Match"],
sea_hyp1$sales_exp_self2016[sea_hyp1$mer_buy_pref == "At Store"])
Welch Two Sample t-test
data: sea_hyp1$sales_exp_self2016[sea_hyp1$mer_buy_pref == "During Match"] and sea_hyp1$sales_exp_self2016[sea_hyp1$mer_buy_pref == "At Store"]
t = 0.37525, df = 39.158, p-value = 0.7095
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-18.82901 27.40792
sample estimates:
mean of x mean of y
51.89815 47.60870
sea_hyp1 <- merchandise %>%
filter(att_attend_2016 == 1)
# Is expenditure the same across merchandises?
t.test(sea_hyp1$sales_exp_self2016[sea_hyp1$mer_buy_pref == "During Match"],
sea_hyp1$sales_exp_self2016[sea_hyp1$mer_buy_pref == "Online"])
Welch Two Sample t-test
data: sea_hyp1$sales_exp_self2016[sea_hyp1$mer_buy_pref == "During Match"] and sea_hyp1$sales_exp_self2016[sea_hyp1$mer_buy_pref == "Online"]
t = 0.2229, df = 222.46, p-value = 0.8238
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-13.90322 17.44951
sample estimates:
mean of x mean of y
51.89815 50.12500
#Graphing the Merchandise preferences and mean expenditure on self.
# 90% CI, get z-value for upper tail, use .95 since is one sided
z <- qnorm(.95)
# Incorporate CI into bar graph of means
sea_hyp1 %>%
group_by(mer_buy_pref) %>%
summarise(m = mean(sales_exp_self2016), sd = sd(sales_exp_self2016),
n = n(), ci = z * sd/sqrt(n)) %>%
ggplot(aes(x = reorder(mer_buy_pref,m), y = m)) +
geom_bar(stat = "identity", position = "dodge", fill = "#9ecae1") +
geom_errorbar(aes(ymin = m - ci, ymax = m + ci),
width = 0.5, position = position_dodge(0.9)) +
theme_classic() +
xlab("") +
ylab("") +
scale_fill_brewer(palette = "PuBuGn") +
theme(axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
axis.ticks.y = element_blank()) +
ggtitle("Merchandise preferences and mean expenditure on self",
subtitle = "Maximum expenditure during match")
mer_reign_subset <- c("Short_Sleeve_Shirt","Long_Sleeve_Shirt","Accessories","Jackets")
mer_t2 <- sea_reign_data %>%
filter(mer_reign %in% mer_reign_subset)
sea_dm <- mer_t2 %>%
filter(mer_buy_pref == "During Match")
#Create a table of General Mills cereals and their mean revenue when an in-store promotion is happening.
merchandise_t2 <- sea_dm %>%
filter(att_attend_2016 == 1) %>%
group_by(mer_reign) %>%
summarise(Count = n(),
Average_Expenditure_Self = mean(sales_exp_self2016),
Average_Expenditure_Others = mean(sales_exp_oth2016)) %>%
arrange(desc(Average_Expenditure_Self)) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
row_spec(1, background = "#fee0d2")
merchandise_t2
| mer_reign | Count | Average_Expenditure_Self | Average_Expenditure_Others |
|---|---|---|---|
| Short_Sleeve_Shirt | 77 | 57.66234 | 46.62338 |
| Long_Sleeve_Shirt | 16 | 49.37500 | 45.62500 |
| Jackets | 7 | 37.14286 | 45.71429 |
| Accessories | 8 | 14.37500 | 39.37500 |
Null Hypothesis: Average expenditure on self is similar for promos and without promos across all merchandise
# Is expenditure the same whether running a promo or not?
t.test(sea_dm$sales_exp_self2016[sea_dm$att_promo_effect == "No influence"],
sea_dm$sales_exp_self2016[sea_dm$att_promo_effect != "No influence"])
Welch Two Sample t-test
data: sea_dm$sales_exp_self2016[sea_dm$att_promo_effect == "No influence"] and sea_dm$sales_exp_self2016[sea_dm$att_promo_effect != "No influence"]
t = 0.15608, df = 21.509, p-value = 0.8774
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-32.41223 37.68049
sample estimates:
mean of x mean of y
54.11765 51.48352
# Is expenditure the same whether running a promo or not?
t.test(sea_dm$sales_exp_oth2016[sea_dm$att_promo_effect == "No influence"],
sea_dm$sales_exp_oth2016[sea_dm$att_promo_effect != "No influence"])
Welch Two Sample t-test
data: sea_dm$sales_exp_oth2016[sea_dm$att_promo_effect == "No influence"] and sea_dm$sales_exp_oth2016[sea_dm$att_promo_effect != "No influence"]
t = 1.1049, df = 22.022, p-value = 0.2811
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-13.16504 43.19090
sample estimates:
mean of x mean of y
58.52941 43.51648
#Graphing mean promotional expenditure across Merchandise.
# 90% CI, get z-value for upper tail, use .95 since is one sided
z <- qnorm(.95)
# Incorporate CI into bar graph of means
mer_t2 %>%
mutate(promo = if_else(att_promo_effect == "Positive influence", "Positive", "None")) %>%
filter(att_attend_2016 == 1) %>%
# mutate(gender = if_else(dem_gender == "Female", "Female", "Male")) %>%
group_by(mer_reign, promo) %>%
summarise(m = mean(sales_exp_self2016), sd = sd(sales_exp_self2016)/20,
n = n(), ci = z * sd/sqrt(n)) %>%
ggplot(aes(x = reorder(mer_reign, m), y = m, fill = promo)) +
geom_bar(stat = "identity", position = "dodge") +
geom_errorbar(aes(ymin = m - ci, ymax = m + ci),
width = 0.5, position = position_dodge(0.9)) +
coord_flip() +
theme_classic() +
xlab("") +
labs(fill = "Promo") +
scale_fill_brewer() +
theme(axis.text = element_text(face = "bold", size = 11),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
axis.title = element_blank(),
legend.position = "top",
legend.title = element_text(face = "bold")) +
ggtitle("Mean expenditure on self during match for Season 2016 ",
subtitle = "Promos are effective for short sleeve shirts\nand Accessories")
base_plot2 <-
mer_t2 %>%
mutate(promo = if_else(att_promo_effect == "Positive influence", "Positive", "None")) %>%
filter(att_attend_2016 == 1) %>%
group_by(mer_reign, promo) %>%
summarise(m = mean(sales_exp_self2016), sd = sd(sales_exp_self2016)/20,
n = n(), ci = z * sd/sqrt(n)) %>%
ggplot(aes(x = reorder(mer_reign, m), y = m, fill = promo)) +
geom_bar(stat = "identity", position = "dodge") +
coord_flip() +
scale_y_continuous(limits = c(0, 60),
labels = dollar_format(prefix = "$")) +
theme_classic() +
xlab("") +
labs(fill = "Promo") +
theme(axis.text = element_text(face = "bold", size = 11),
axis.title = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
axis.line.x = element_line(colour = "grey"),
legend.position = "top",
legend.title = element_text(face = "bold")) +
scale_fill_brewer() +
ggtitle("Mean expenditure during match for Season 2016 ",
subtitle = "Promos are statistically effective for short sleeve shirts\nand Accessories")
save.image("base_plot2.RData")
base_plot2
#Target brand
baseplot3 <-
mer_t2 %>%
#mutate(gender = if_else(dem_gender == "Female", "Female", "Male")) %>%
mutate(sponsor = if_else(mer_purchase_sponsors == "BECU", "BECU",
if_else(mer_purchase_sponsors == "Microsoft", "MSFT",
if_else(mer_purchase_sponsors == "Pepsi", "Pepsi", "Others"))))%>%
filter(att_attend_2016 == 1) %>%
group_by(mer_reign, sponsor) %>%
summarise(m = mean(sales_exp_self2016), sd = sd(sales_exp_self2016)/20,
n = n(), ci = z * sd/sqrt(n)) %>%
ggplot(aes(x = reorder(mer_reign, m), y = m, fill = sponsor)) +
geom_bar(stat = "identity", position = position_dodge(width = 0.8), width=0.7) +
coord_flip() +
scale_y_continuous(limits = c(0, 60),
labels = dollar_format(prefix = "$")) +
theme_classic() +
xlab("") +
labs(fill = "Sponsors") +
theme(axis.text = element_text(face = "bold", size = 11),
axis.title = element_blank(),
axis.ticks.y = element_blank(),
axis.line.y = element_blank(),
axis.line.x = element_line(colour = "grey"),
legend.position = "top",
legend.title = element_text(face = "bold")) +
scale_fill_brewer() +
ggtitle("Popular sponsors during Season 2016 ",
subtitle = "Pepsi is highest selling brand for Short Sleeve\nShirts and Jackets")
save.image("baseplot3.RData")
baseplot3
As evident from the above graphs, Short Sleeve Shirts and Accessories sell most when promotions have positive influence
Long Sleeve Shirts and Jackets are selling even if there is no influence of promotions which means it is important to focus on promotional strategy for products like Short Sleeve Shirts and Accessories
mer_subset <- c("During Match","Online","At Store")
mer_reign_subset <- c("Short_Sleeve_Shirt","Long_Sleeve_Shirt","Accessories","Jackets")
merchandise_t1 <- sea_reign_data %>%
filter(mer_buy_pref %in% mer_subset) %>%
filter(mer_reign %in% mer_reign_subset)
gStore <- sea_hyp1 %>%
filter(mer_buy_pref == "At Store") %>%
filter(mer_reign != "Accessories") %>%
group_by(mer_buy_pref, mer_reign) %>%
summarise(m = mean(sales_exp_self2016), sd = sd(sales_exp_self2016),
n = n(), ci = z * sd/sqrt(n)) %>%
ggplot(aes(x = reorder(mer_reign, m), y = m)) +
geom_bar(stat = "identity", position = "dodge",
fill = "#deebf7") +
coord_flip() +
scale_y_continuous(limits = c(0, 75)) +
theme_classic() +
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title = element_blank(),
axis.text.y = element_text(size = 9),
axis.text.x = element_blank()) +
ggtitle("Store") +
theme(plot.title = element_text(hjust = -0.08,
vjust = -0.30,
colour = "black",
face = "bold",
size = 11),
plot.margin = unit(c(0.2,0.2,0.2,0.2), "cm"))
gOnline <- sea_hyp1 %>%
filter(mer_buy_pref == "Online") %>%
group_by(mer_buy_pref, mer_reign) %>%
summarise(m = mean(sales_exp_self2016), sd = sd(sales_exp_self2016),
n = n(), ci = z * sd/sqrt(n)) %>%
ggplot(aes(x = reorder(mer_reign, m), y = m)) +
geom_bar(stat = "identity", position = "dodge",
fill = "#9ecae1") +
coord_flip() +
scale_y_continuous(limits = c(0, 75)) +
theme_classic() +
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title = element_blank(),
axis.text.y = element_text(size = 9),
axis.text.x = element_blank()) +
ggtitle("Online") +
theme(plot.title = element_text(hjust = -0.08,
vjust = -0.30,
colour = "black",
face = "bold",
size = 11),
plot.margin = unit(c(0.2,0.2,0.2,0.2), "cm"))
gMatch <- sea_hyp1 %>%
filter(mer_buy_pref == "During Match") %>%
group_by(mer_buy_pref, mer_reign) %>%
summarise(m = mean(sales_exp_self2016), sd = sd(sales_exp_self2016),
n = n(), ci = z * sd/sqrt(n)) %>%
ggplot(aes(x = reorder(mer_reign, m), y = m)) +
geom_bar(stat = "identity", position = "dodge", fill = "#3182bd") +
coord_flip() +
scale_y_continuous(limits = c(0, 75),
labels = dollar_format(prefix = "$")) +
theme_classic() +
theme(axis.line.y = element_blank(),
axis.title = element_blank(),
axis.ticks.y = element_blank(),
axis.line.x = element_line(colour = "grey"),
axis.text = element_text(size = 9),
legend.position = "bottom") +
ggtitle("Match") +
theme(plot.title = element_text(hjust = -0.08,
vjust = -0.30,
colour = "black",
face = "bold",
size = 11),
plot.margin = unit(c(0.2,0.2,0.2,0.2), "cm"))
base_plot1 <- cowplot::plot_grid(gStore, gOnline, gMatch,
align = "v", nrow = 3,
rel_heights = c(0.3, 0.5, 0.58),
labels = "Average expenditure Season 2016",
label_size = 12,
hjust = -0.9,
vjust = 1.7)
save.image("base_plot1.RData")
base_plot1
# The palette with purples:
cbPalette <- c("#dddaed","#c5b4e3", "#9578d3", "#7d55c7")
#Table for ticket type for effectiveness across the whole market.
ticket_subset <- c("3 Match Pack","5 Match Pack","Season Ticket","Single Match")
ticket_t1 <- sea_reign_data %>%
filter(sales_tkt_type_2017 %in% ticket_subset)
ticket_t1 %>%
group_by(sales_tkt_type_2017) %>%
summarise(Count = n(),
Total_matches2017 = sum(sales_numofmatch_2017)) %>%
arrange(desc(Total_matches2017)) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
row_spec(1, background = "#e5f5e0")
| sales_tkt_type_2017 | Count | Total_matches2017 |
|---|---|---|
| Season Ticket | 134 | 1428 |
| Single Match | 186 | 560 |
| 5 Match Pack | 30 | 159 |
| 3 Match Pack | 21 | 75 |
Null Hypothesis: People who are planning to attend season 2017, would like to buy all types of tickets
sea_hyp2 <- ticket_t1 %>%
filter(att_attend_2017 == 1)
# Is ticket type same for everyone who is attending match in 2017?
t.test(sea_hyp2$sales_numofmatch_2017[sea_hyp2$sales_tkt_type_2017 == "5 Match Pack"],
sea_hyp2$sales_numofmatch_2017[sea_hyp2$sales_tkt_type_2017 == "Season Ticket"])
Welch Two Sample t-test
data: sea_hyp2$sales_numofmatch_2017[sea_hyp2$sales_tkt_type_2017 == and sea_hyp2$sales_numofmatch_2017[sea_hyp2$sales_tkt_type_2017 == "5 Match Pack"] and "Season Ticket"]
t = -15.32, df = 62.313, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-5.939197 -4.568322
sample estimates:
mean of x mean of y
5.392857 10.646617
sea_hyp2 <- ticket_t1 %>%
filter(att_attend_2017 == 1)
# Is ticket type same for everyone who is attending match in 2017?
t.test(sea_hyp2$sales_numofmatch_2017[sea_hyp2$sales_tkt_type_2017 == "5 Match Pack"],
sea_hyp2$sales_numofmatch_2017[sea_hyp2$sales_tkt_type_2017 == "Single Match"])
Welch Two Sample t-test
data: sea_hyp2$sales_numofmatch_2017[sea_hyp2$sales_tkt_type_2017 == and sea_hyp2$sales_numofmatch_2017[sea_hyp2$sales_tkt_type_2017 == "5 Match Pack"] and "Single Match"]
t = 6.9298, df = 44.918, p-value = 1.307e-08
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
1.534338 2.791749
sample estimates:
mean of x mean of y
5.392857 3.229814
sea_hyp2 <- ticket_t1 %>%
filter(att_attend_2017 == 1)
# Is ticket type same for everyone who is attending match in 2017?
t.test(sea_hyp2$sales_numofmatch_2017[sea_hyp2$sales_tkt_type_2017 == "5 Match Pack"],
sea_hyp2$sales_numofmatch_2017[sea_hyp2$sales_tkt_type_2017 == "3 Match Pack"])
Welch Two Sample t-test
data: sea_hyp2$sales_numofmatch_2017[sea_hyp2$sales_tkt_type_2017 == and sea_hyp2$sales_numofmatch_2017[sea_hyp2$sales_tkt_type_2017 == "5 Match Pack"] and "3 Match Pack"]
t = 3.5914, df = 31.398, p-value = 0.001107
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
0.8071108 2.9259720
sample estimates:
mean of x mean of y
5.392857 3.526316
#Graphing the relationship between in-store promotions and revenue across companies.
# 90% CI, get z-value for upper tail, use .95 since is one sided
z <- qnorm(.95)
# Incorporate CI into bar graph of means
sea_hyp2 %>%
group_by(sales_tkt_type_2017) %>%
summarise(m = mean(sales_numofmatch_2017), sd = sd(sales_numofmatch_2017),
n = n(), ci = z * sd/sqrt(n)) %>%
ggplot(aes(x = reorder(sales_tkt_type_2017,m), y = m)) +
geom_bar(stat = "identity", position = "dodge", fill = "#9ecae1") +
geom_errorbar(aes(ymin = m - ci, ymax = m + ci),
width = 0.5, position = position_dodge(0.9)) +
theme_classic() +
xlab("") +
ylab("") +
theme(axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
axis.ticks.y = element_blank()) +
# To use for fills, add
scale_fill_manual(values=cbPalette) +
ggtitle("Average number of ticket people are planning buy for season 2017",
subtitle = "Season ticket is likely to sell the most")
timings_subset <- c("Sunday Evenings","Friday Evenings",
"Sunday Afternoons 4pm", "Saturday Afternoons 4pm","Saturday Afternoons 1pm")
ticket_t2 <- sea_reign_data %>%
filter(att_most_pref_time %in% timings_subset)
sea_st <- ticket_t2 %>%
filter(sales_tkt_type_2017 == "Season Ticket") %>%
mutate(promo = if_else(att_promo_effect == "Positive influence", "1", "0"))
#Create a table of most preferred timing when people are likely to attend a match in 2017 season
ticket_t2 <- sea_st %>%
filter(att_attend_2017 == 1) %>%
group_by(att_most_pref_time) %>%
summarise(Count = n(),
Total_matches2017 = sum(sales_numofmatch_2017)) %>%
arrange(desc(Total_matches2017)) %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
row_spec(5, background = "#fee0d2")
ticket_t2
| att_most_pref_time | Count | Total_matches2017 |
|---|---|---|
| Sunday Evenings | 72 | 763 |
| Sunday Afternoons 4pm | 25 | 276 |
| Saturday Afternoons 4pm | 14 | 146 |
| Saturday Afternoons 1pm | 4 | 46 |
| Friday Evenings | 3 | 31 |
Null Hypothesis: Average number tickets sold will be same at all timings with or without promos for Season 2017
sea_hyp2 <- sea_st %>%
filter(promo == 1)
# Is number of tickets sold same at all timings whether running a promo or not?
t.test(sea_hyp2$sales_numofmatch_2017[sea_hyp2$att_most_pref_time == "Sunday Evenings"],
sea_hyp2$sales_numofmatch_2017[sea_hyp2$att_most_pref_time != "Friday Evenings"])
Welch Two Sample t-test
data: sea_hyp2$sales_numofmatch_2017[sea_hyp2$att_most_pref_time == and sea_hyp2$sales_numofmatch_2017[sea_hyp2$att_most_pref_time != "Sunday Evenings"] and "Friday Evenings"]
t = 0.25035, df = 39.14, p-value = 0.8036
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-1.037852 1.331085
sample estimates:
mean of x mean of y
10.78947 10.64286
sea_hyp2 <- sea_st %>%
filter(promo == 1)
# Is number of tickets sold same at all timings whether running a promo or not?
t.test(sea_hyp2$sales_numofmatch_2017[sea_hyp2$att_most_pref_time == "Sunday Evenings"],
sea_hyp2$sales_numofmatch_2017[sea_hyp2$att_most_pref_time != "Saturday Afternoons 1pm"])
Welch Two Sample t-test
data: sea_hyp2$sales_numofmatch_2017[sea_hyp2$att_most_pref_time == and sea_hyp2$sales_numofmatch_2017[sea_hyp2$att_most_pref_time != "Sunday Evenings"] and "Saturday Afternoons 1pm"]
t = 0.54903, df = 39.441, p-value = 0.5861
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.8797946 1.5356650
sample estimates:
mean of x mean of y
10.78947 10.46154
sea_hyp2 <- sea_st %>%
filter(promo == 1)
# Is number of tickets sold same at all timings whether running a promo or not?
t.test(sea_hyp2$sales_numofmatch_2017[sea_hyp2$att_most_pref_time == "Sunday Evenings"],
sea_hyp2$sales_numofmatch_2017[sea_hyp2$att_most_pref_time != "Sunday Afternoons 4pm"])
Welch Two Sample t-test
data: sea_hyp2$sales_numofmatch_2017[sea_hyp2$att_most_pref_time == and sea_hyp2$sales_numofmatch_2017[sea_hyp2$att_most_pref_time != "Sunday Evenings"] and "Sunday Afternoons 4pm"]
t = 0.034634, df = 38.276, p-value = 0.9726
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-1.162708 1.203194
sample estimates:
mean of x mean of y
10.78947 10.76923
sea_hyp2 <- sea_st %>%
filter(promo == 1)
# Is number of tickets sold same at all timings whether running a promo or not?
t.test(sea_hyp2$sales_numofmatch_2017[sea_hyp2$att_most_pref_time == "Sunday Evenings"],
sea_hyp2$sales_numofmatch_2017[sea_hyp2$att_most_pref_time != "Saturday Afternoons 4pm"])
Welch Two Sample t-test
data: sea_hyp2$sales_numofmatch_2017[sea_hyp2$att_most_pref_time == and sea_hyp2$sales_numofmatch_2017[sea_hyp2$att_most_pref_time != "Sunday Evenings"] and "Saturday Afternoons 4pm"]
t = 0.10032, df = 38.34, p-value = 0.9206
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-1.125513 1.242922
sample estimates:
mean of x mean of y
10.78947 10.73077
#Graphing mean promotional revenue across General Mill branded cereals.
# 90% CI, get z-value for upper tail, use .95 since is one sided
z <- qnorm(.95)
# Incorporate CI into bar graph of means
sea_st %>%
mutate(promo = if_else(att_promo_effect == "Positive influence", "Positive", "None")) %>%
mutate(pref_time = case_when(att_most_pref_time == "Friday Evenings" ~ "Fri Evenings",
att_most_pref_time == "Saturday Afternoons 4pm" ~ "Sat Aft4pm",
att_most_pref_time == "Sunday Afternoons 4pm" ~ "Sun Aft4pm",
att_most_pref_time == "Saturday Afternoons 1pm" ~ "Sat Aft1pm",
att_most_pref_time == "Sunday Evenings" ~ "Sun Evenings")) %>%
group_by(pref_time, promo) %>%
summarise(m = mean(sales_numofmatch_2017), sd = sd(sales_numofmatch_2017)/5,
n = n(), ci = z * sd/sqrt(n)) %>%
ggplot(aes(x = reorder(pref_time, m), y = m, fill = reorder(promo,m))) +
geom_bar(stat = "identity", position = "dodge") +
geom_errorbar(aes(ymin = m - ci, ymax = m + ci),
width = 0.5, position = position_dodge(0.9)) +
# coord_flip() +
theme_classic() +
xlab("") +
labs(fill = "Promo Effect") +
theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
theme(axis.text = element_text(face = "bold", size = 11),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
axis.title = element_blank(),
legend.position = "top",
legend.title = element_text(face = "bold")) +
# To use for fills, add
scale_fill_manual(values=cbPalette) +
ggtitle("Mean number of Season Tickets likely to sell in Season 2017",
subtitle = "Promos are not effective on weekend matches")
# The palette with purples:
cbPalette <- c("#dddaed","#c5b4e3", "#9578d3", "#7d55c7")
base_plot5 <-
sea_st %>%
mutate(promo = if_else(att_promo_effect == "Positive influence", "Positive", "None")) %>%
mutate(pref_time = case_when(att_most_pref_time == "Friday Evenings" ~ "Fri Evenings",
att_most_pref_time == "Saturday Afternoons 4pm" ~ "Sat Aft4pm",
att_most_pref_time == "Sunday Afternoons 4pm" ~ "Sun Aft4pm",
att_most_pref_time == "Saturday Afternoons 1pm" ~ "Sat Aft1pm",
att_most_pref_time == "Sunday Evenings" ~ "Sun Evenings")) %>%
group_by(pref_time, promo) %>%
summarise(m = mean(sales_numofmatch_2017), sd = sd(sales_numofmatch_2017)/5,
n = n(), ci = z * sd/sqrt(n)) %>%
ggplot(aes(x = reorder(pref_time, m), y = m, fill = promo)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(label = round(m, 1)), vjust = -0.5, color = "black",
position = position_dodge(.9), size = 2.5) +
# coord_flip() +
theme_classic() +
xlab("") +
ylab("Mean") +
labs(fill = "Promo Effect") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(axis.text = element_text(face = "bold", size = 11),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "top",
legend.title = element_text(face = "bold")) +
scale_fill_manual(values=cbPalette) +
ggtitle("Predicted Mean number of Season Tickets likely to sell in Season 2017",
subtitle = "Promos are statistically effective on Sat afternoon only")
save.image("base_plot5.RData")
base_plot5
audience_subset <- c("College Graduate", "Graduate Degree","Some College",
"High School Graduate")
timings_subset <- c("Sunday Evenings",
"Sunday Afternoons 4pm", "Saturday Afternoons 4pm", "Friday Evenings", "Saturday Afternoons 1pm")
audience_new <- sea_st %>%
filter(dem_education %in% audience_subset) %>%
filter(att_most_pref_time %in% timings_subset)
base_plot6 <-
audience_new %>%
mutate(audience = if_else(dem_education == "Some College", "Some College",
if_else(dem_education == "Graduate Degree", "Graduate Degree", "College Graduate"))) %>%
group_by(att_most_pref_time, audience) %>%
summarise(mean = mean(sales_numofmatch_2017), sd = sd(sales_numofmatch_2017),
n = n(), ci = z * sd/sqrt(n)) %>%
ggplot(aes(x = reorder(att_most_pref_time, mean), y = mean, fill = audience)) +
geom_bar(stat = "identity", position = position_dodge2(preserve = "single", padding = 0)) +
geom_text(aes(label = round(mean, 1)), vjust = -0.5, color = "black",
position = position_dodge(.9), size = 2.5) +
theme_classic() +
xlab("") +
ylab("Mean") +
labs(fill = "Main Audience") +
theme(axis.text.x = element_text(angle = 35, hjust = 1)) +
theme(axis.text = element_text(face = "bold", size = 11),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "top",
legend.title = element_text(face = "bold")) +
scale_fill_manual(values=cbPalette) +
ggtitle("Predicted mean of target audience, likely to attend Season 2017",
subtitle = "Surprisingly, only College students are more likely\nto attend matches on Saturday Afternoons1pm")
save.image("base_plot6.RData")
base_plot6
# The palette with purples:
cbPalette <- c("#dddaed","#c5b4e3", "#9578d3", "#7d55c7")
#Table for ticket type for effectiveness across the whole market.
ticket_subset <- c("5 Match Pack","Season Ticket","Single Match")
timings_subset <- c("Wed Evenings", "Sunday Evenings","Friday Evenings",
"Sunday Afternoons 4pm", "Saturday Afternoons 4pm","Saturday Afternoons 1pm")
sea_sm1 <- sea_reign_data %>%
filter(att_most_pref_time %in% timings_subset) %>%
filter(sales_tkt_type_2017 %in% ticket_subset)
g5MatchPack <- sea_sm1 %>%
filter(sales_ticket_type == "5 Match Pack") %>%
group_by(sales_tkt_type_2017, att_most_pref_time) %>%
summarise(m = sum(sales_numofmatch_2017), sd = sd(sales_numofmatch_2017),
n = n(), ci = z * sd/sqrt(n)) %>%
ggplot(aes(x = reorder(att_most_pref_time, m), y = m)) +
geom_bar(stat = "identity", position = "dodge",
fill = "#dddaed") +
coord_flip() +
scale_y_continuous(limits = c(0, 75)) +
theme_classic() +
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title = element_blank(),
axis.text.y = element_text(size = 9),
axis.text.x = element_blank()) +
ggtitle("5 Match Pack") +
theme(plot.title = element_text(hjust = -0.22,
vjust = -0.30,
colour = "black",
face = "bold",
size = 11),
plot.margin = unit(c(0.2,0.2,0.2,0.2), "cm"))
gSingle <- sea_sm1 %>%
filter(sales_tkt_type_2017 == "Single Match") %>%
filter(att_most_pref_time != "Sunday Evenings") %>%
filter(att_most_pref_time != "Sunday Afternoons 4pm") %>%
group_by(sales_tkt_type_2017, att_most_pref_time) %>%
summarise(m = sum(sales_numofmatch_2017), sd = sd(sales_numofmatch_2017),
n = n(), ci = z * sd/sqrt(n)) %>%
ggplot(aes(x = reorder(att_most_pref_time, m), y = m)) +
geom_bar(stat = "identity", position = "dodge",
fill = "#c5b4e3") +
coord_flip() +
scale_y_continuous(limits = c(0, 75)) +
theme_classic() +
theme(axis.line.y = element_blank(),
axis.ticks.y = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title = element_blank(),
axis.text.y = element_text(size = 9),
axis.text.x = element_blank()) +
ggtitle("Single Match") +
theme(plot.title = element_text(hjust = -0.22,
vjust = -0.30,
colour = "black",
face = "bold",
size = 11),
plot.margin = unit(c(0.2,0.2,0.2,0.2), "cm"))
gSeason <- sea_sm1 %>%
filter(sales_tkt_type_2017 == "Season Ticket") %>%
group_by(sales_tkt_type_2017, att_most_pref_time) %>%
summarise(m = sum(sales_numofmatch_2017), sd = sd(sales_numofmatch_2017),
n = n(), ci = z * sd/sqrt(n)) %>%
ggplot(aes(x = reorder(att_most_pref_time, m), y = m)) +
geom_bar(stat = "identity", position = "dodge", fill = "#9578d3") +
coord_flip() +
theme_classic() +
theme(axis.line.y = element_blank(),
axis.title = element_blank(),
axis.ticks.y = element_blank(),
axis.line.x = element_line(colour = "grey"),
axis.text = element_text(size = 9),
legend.position = "bottom") +
ggtitle("Season Ticket") +
theme(plot.title = element_text(hjust = -0.22,
vjust = 0.30,
colour = "black",
face = "bold",
size = 11),
plot.margin = unit(c(0.2,0.2,0.2,0.2), "cm"))
base_plot4 <- cowplot::plot_grid(g5MatchPack, gSingle, gSeason,
align = "v", nrow = 3,
rel_heights = c(0.35, 0.35, 0.5),
labels = "Predicted Ticket Sales Season 2017",
label_size = 12,
hjust = -0.9,
vjust = 1.7)
save.image("base_plot4.RData")
base_plot4
Possible recommendations for Seattle Reign FC to increase ticket sales and attendance:
Purchasing habits of visitors, most likely to purchase from stores
Seattle Reign FC can focus on sales of Short Sleeve Shirts during the match because it is most selling merchandise for all sponsors
Since, only College Graduates are likely to attend matches on Saturday Afternoons 1pm, it is important to find out reasons of why the College and Graduate students are not attending the match, is the distance of stadium far from the city to attend with family?
Check number of observations in each Category-Region pair
# Number of observations helps us determine if stats will be valid
sea_reign_data %>%
tabyl(mer_buy_pref, mer_purchase_sponsors) %>%
adorn_totals(where = c("row", "col"))
mer_buy_pref BECU Carter Subaru Kraken Congee Microsoft Not sure Pepsi Ruffneck Scarves Total
At Store 0 1 0 9 8 8 0 26
During Match 15 6 5 41 21 23 5 116
Neutral 15 6 3 43 44 17 2 130
Online 15 7 0 61 24 32 3 142
Total 45 20 8 154 97 80 10 414
Large enough number of observations for each cell indicate sample results will be statistically valid
# Are Category and Region sales similar?
# Not tidyverse so have to use base code
chisq.test(table(sea_reign_data$mer_buy_pref, sea_reign_data$mer_purchase_sponsors))
Chi-squared approximation may be incorrect
Pearson's Chi-squared test
data: table(sea_reign_data$mer_buy_pref, sea_reign_data$mer_purchase_sponsors)
X-squared = 30.777, df = 18, p-value = 0.03054
# Are the correlations statistically significant?
# Not tidyverse so have to use base code
cor.test(sea_reign_data$sales_exp_oth2016, sea_reign_data$sales_exp_self2016)
Pearson's product-moment correlation
data: sea_reign_data$sales_exp_oth2016 and sea_reign_data$sales_exp_self2016
t = 2.3786, df = 412, p-value = 0.01783
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.02023787 0.21040629
sample estimates:
cor
0.1163887
# Table of counts to calculate the confidence interval
P_B_n <- sea_reign_data %>%
group_by(mer_buy_pref, mer_purchase_sponsors) %>%
summarise(n = n())
# Calulate confidence intervals using mulitnomialCI
P_B_n_ci <- multinomialCI(t(P_B_n[, 3]), 0.05) # use t() to transpose the count table
# alpha = 0.05 indicates 95% confidence level
# Table with proportions
P_B_table <- sea_reign_data %>%
group_by(mer_buy_pref, mer_purchase_sponsors) %>%
summarise(prop = n()/sum(nrow(sea_reign_data)))
# Add the confindence inervals to the table of proportions
P_B_table$ci_l <- P_B_n_ci[,1]
P_B_table$ci_u <- P_B_n_ci[,2]
# Show the table
(P_B_table)
#Graphing mean promotional revenue across General Mill branded cereals.
# 90% CI, get z-value for upper tail, use .95 since is one sided
z <- qnorm(.95)
# Incorporate CI into bar graph of means
sea_st %>%
mutate(promo = if_else(att_promo_effect == "Positive influence", "Positive", "None")) %>%
mutate(pref_time = case_when(att_most_pref_time == "Friday Evenings" ~ "Fri Evenings",
att_most_pref_time == "Saturday Afternoons 4pm" ~ "Sat Aft4pm",
att_most_pref_time == "Sunday Afternoons 4pm" ~ "Sun Aft4pm",
att_most_pref_time == "Saturday Afternoons 1pm" ~ "Sat Aft1pm",
att_most_pref_time == "Sunday Evenings" ~ "Sun Evenings")) %>%
group_by(pref_time, promo) %>%
summarise(m = mean(sales_numofmatch_2017), sd = sd(sales_numofmatch_2017)/5,
n = n(), ci = z * sd/sqrt(n)) %>%
ggplot(aes(x = reorder(pref_time, m), y = m, fill = reorder(promo,m))) +
geom_bar(stat = "identity", position = "dodge") +
geom_errorbar(aes(ymin = m - ci, ymax = m + ci),
width = 0.5, position = position_dodge(0.9)) +
# coord_flip() +
theme_classic() +
xlab("") +
labs(fill = "Promo Effect") +
theme(axis.text.x = element_text(angle = 30, hjust = 1)) +
theme(axis.text = element_text(face = "bold", size = 11),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
axis.title = element_blank(),
legend.position = "top",
legend.title = element_text(face = "bold")) +
# To use for fills, add
scale_fill_manual(values=cbPalette) +
ggtitle("Mean number of Season Tickets likely to sell in Season 2017",
subtitle = "Promos are not effective on weekend matches")
Interpretations
Cleaner graph
# The palette with purples:
cbPalette <- c("#dddaed","#c5b4e3", "#9578d3", "#7d55c7")
base_plot5 <-
sea_st %>%
mutate(promo = if_else(att_promo_effect == "Positive influence", "Positive", "None")) %>%
mutate(pref_time = case_when(att_most_pref_time == "Friday Evenings" ~ "Fri Evenings",
att_most_pref_time == "Saturday Afternoons 4pm" ~ "Sat Aft4pm",
att_most_pref_time == "Sunday Afternoons 4pm" ~ "Sun Aft4pm",
att_most_pref_time == "Saturday Afternoons 1pm" ~ "Sat Aft1pm",
att_most_pref_time == "Sunday Evenings" ~ "Sun Evenings")) %>%
group_by(pref_time, promo) %>%
summarise(m = mean(sales_numofmatch_2017), sd = sd(sales_numofmatch_2017)/5,
n = n(), ci = z * sd/sqrt(n)) %>%
ggplot(aes(x = reorder(pref_time, m), y = m, fill = promo)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(label = round(m, 1)), vjust = -0.5, color = "black",
position = position_dodge(.9), size = 2.5) +
# coord_flip() +
theme_classic() +
xlab("") +
ylab("Mean") +
labs(fill = "Promo Effect") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(axis.text = element_text(face = "bold", size = 11),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "top",
legend.title = element_text(face = "bold")) +
scale_fill_manual(values=cbPalette) +
ggtitle("Predicted Mean number of Season Tickets likely to sell in Season 2017",
subtitle = "Promos are statistically effective on Sat afternoon only")
select(factor1_attend,att_first_attend,att_interest,att_gamewith,att_travel_mode,att_distance, att_promo_effect,att_player_fan,att_seaEvent, att_most_pref_time, att_least_preferred_timing,att_attend_2017,att_attend_2016, att_gamewith, sales_ticket_type, sales_exp_self2016, sales_exp_oth2016,sales_numofmatch_2017,sales_tkt_type_2017,media_web, media_season_newspaper,media_season_tv, mer_buy_pref,cost, mer_purchase_sponsors,mer_reign,dem_gender,dem_sexualOrient,dem_marital_status, dem_education,sat_gameday,satisfacn_perf) %>% drop_na()
# Linear regression
# Interaction term units and promo because number of units and promotions could affect revenue (approximately 20percent correlated)
base <- lm(sales_exp_self2016 ~ sales_numofmatch_2017 + sales_tkt_type_2017 + mer_buy_pref + mer_purchase_sponsors + mer_reign
+ dem_education,
data = sea_reign_data)
# Review output
summary(base)
# plot residuals to check for patterns
par(mfrow = c(1, 1))
plot(sea_reign_data$sales_exp_self2016, base$residuals)
plot(sea_reign_data$sales_numofmatch_2017, base$residuals)
plot(sea_reign_data$att_distance, base$residuals)
How easy is it to understand which variables are:
# Pull out the coefficients and confidence interval for table and graph
coe <- summary(base)$coefficients # get coefficients and related stats
coe_CI <- as.data.frame(cbind(coe[-1, ], confint(base)[-1, ])) # find and bind CI, remove Intercept
# Rename results data frame
names(coe_CI) <- c("estimate", "se", "t", "pval","low_CI","high_CI")
htmlTable(round(coe_CI,3))
| estimate | se | t | pval | low_CI | high_CI | |
|---|---|---|---|---|---|---|
| sales_numofmatch_2017 | 1.946 | 1.147 | 1.696 | 0.091 | -0.309 | 4.202 |
| sales_tkt_type_20175 Match Pack | 24.624 | 14.586 | 1.688 | 0.092 | -4.053 | 53.301 |
| sales_tkt_type_2017Pitchside Table | 5.365 | 28.469 | 0.188 | 0.851 | -50.606 | 61.336 |
| sales_tkt_type_2017Season Ticket | 21.969 | 14.166 | 1.551 | 0.122 | -5.881 | 49.82 |
| sales_tkt_type_2017Single Match | -9.222 | 11.596 | -0.795 | 0.427 | -32.021 | 13.578 |
| sales_tkt_type_2017Undecided | 1.938 | 13.772 | 0.141 | 0.888 | -25.139 | 29.015 |
| mer_buy_prefDuring Match | -2.554 | 11.237 | -0.227 | 0.82 | -24.647 | 19.539 |
| mer_buy_prefNeutral | -18.256 | 11.068 | -1.65 | 0.1 | -40.016 | 3.503 |
| mer_buy_prefOnline | -5.186 | 11.024 | -0.47 | 0.638 | -26.86 | 16.488 |
| mer_purchase_sponsorsCarter Subaru | 8.206 | 13.598 | 0.603 | 0.547 | -18.529 | 34.942 |
| mer_purchase_sponsorsKraken Congee | -2.719 | 19.437 | -0.14 | 0.889 | -40.934 | 35.496 |
| mer_purchase_sponsorsMicrosoft | 8.176 | 8.696 | 0.94 | 0.348 | -8.921 | 25.273 |
| mer_purchase_sponsorsNot sure | -10.841 | 9.615 | -1.127 | 0.26 | -29.745 | 8.063 |
| mer_purchase_sponsorsPepsi | 10.31 | 9.539 | 1.081 | 0.28 | -8.444 | 29.063 |
| mer_purchase_sponsorsRuffneck Scarves | 44.222 | 17.605 | 2.512 | 0.012 | 9.608 | 78.835 |
| mer_reignJackets | 2.313 | 16.526 | 0.14 | 0.889 | -30.179 | 34.805 |
| mer_reignLong_Sleeve_Shirt | 17.816 | 13.129 | 1.357 | 0.176 | -7.997 | 43.629 |
| mer_reignNo preferences | 32.597 | 12.655 | 2.576 | 0.01 | 7.717 | 57.477 |
| mer_reignShort_Sleeve_Shirt | 26.169 | 11.207 | 2.335 | 0.02 | 4.135 | 48.202 |
| dem_educationGraduate Degree | -0.707 | 5.456 | -0.13 | 0.897 | -11.435 | 10.021 |
| dem_educationHigh School Graduate | 27.008 | 19.717 | 1.37 | 0.172 | -11.757 | 65.773 |
| dem_educationOther | 57.663 | 35.918 | 1.605 | 0.109 | -12.954 | 128.281 |
| dem_educationSome College | 11.184 | 8.441 | 1.325 | 0.186 | -5.411 | 27.779 |
# Make a neat table
htmlTable(round(coe_CI[order(coe_CI$pval, decreasing = FALSE), ], 3))
| estimate | se | t | pval | low_CI | high_CI | |
|---|---|---|---|---|---|---|
| mer_reignNo preferences | 32.597 | 12.655 | 2.576 | 0.01 | 7.717 | 57.477 |
| mer_purchase_sponsorsRuffneck Scarves | 44.222 | 17.605 | 2.512 | 0.012 | 9.608 | 78.835 |
| mer_reignShort_Sleeve_Shirt | 26.169 | 11.207 | 2.335 | 0.02 | 4.135 | 48.202 |
| sales_numofmatch_2017 | 1.946 | 1.147 | 1.696 | 0.091 | -0.309 | 4.202 |
| sales_tkt_type_20175 Match Pack | 24.624 | 14.586 | 1.688 | 0.092 | -4.053 | 53.301 |
| mer_buy_prefNeutral | -18.256 | 11.068 | -1.65 | 0.1 | -40.016 | 3.503 |
| dem_educationOther | 57.663 | 35.918 | 1.605 | 0.109 | -12.954 | 128.281 |
| sales_tkt_type_2017Season Ticket | 21.969 | 14.166 | 1.551 | 0.122 | -5.881 | 49.82 |
| dem_educationHigh School Graduate | 27.008 | 19.717 | 1.37 | 0.172 | -11.757 | 65.773 |
| mer_reignLong_Sleeve_Shirt | 17.816 | 13.129 | 1.357 | 0.176 | -7.997 | 43.629 |
| dem_educationSome College | 11.184 | 8.441 | 1.325 | 0.186 | -5.411 | 27.779 |
| mer_purchase_sponsorsNot sure | -10.841 | 9.615 | -1.127 | 0.26 | -29.745 | 8.063 |
| mer_purchase_sponsorsPepsi | 10.31 | 9.539 | 1.081 | 0.28 | -8.444 | 29.063 |
| mer_purchase_sponsorsMicrosoft | 8.176 | 8.696 | 0.94 | 0.348 | -8.921 | 25.273 |
| sales_tkt_type_2017Single Match | -9.222 | 11.596 | -0.795 | 0.427 | -32.021 | 13.578 |
| mer_purchase_sponsorsCarter Subaru | 8.206 | 13.598 | 0.603 | 0.547 | -18.529 | 34.942 |
| mer_buy_prefOnline | -5.186 | 11.024 | -0.47 | 0.638 | -26.86 | 16.488 |
| mer_buy_prefDuring Match | -2.554 | 11.237 | -0.227 | 0.82 | -24.647 | 19.539 |
| sales_tkt_type_2017Pitchside Table | 5.365 | 28.469 | 0.188 | 0.851 | -50.606 | 61.336 |
| sales_tkt_type_2017Undecided | 1.938 | 13.772 | 0.141 | 0.888 | -25.139 | 29.015 |
| mer_reignJackets | 2.313 | 16.526 | 0.14 | 0.889 | -30.179 | 34.805 |
| mer_purchase_sponsorsKraken Congee | -2.719 | 19.437 | -0.14 | 0.889 | -40.934 | 35.496 |
| dem_educationGraduate Degree | -0.707 | 5.456 | -0.13 | 0.897 | -11.435 | 10.021 |
# Cleveland dot plot of results
ggplot(coe_CI, aes(x = estimate, y = row.names(coe_CI))) +
geom_point(size = 3) +
xlim(min(coe_CI$low_CI), max(coe_CI$high_CI)) +
ylab("Variable") +
xlab("Coefficient") +
theme_bw()
This will order by most statistically significant
# Reorder for more clarity
(g1 <- ggplot(coe_CI, aes(x = estimate, y = reorder(row.names(coe_CI),desc(pval)))) +
geom_point(size = 3) +
xlim(min(coe_CI$low_CI), max(coe_CI$high_CI)) +
ylab("Variable") +
xlab("Coefficient") +
theme_bw()
)
NA
# Use geom_segment to illustrate CI
(g2 <- g1 +
geom_segment(aes(yend = reorder(row.names(coe_CI),desc(pval))),
xend = coe_CI$high_CI, color = "Blue") +
geom_segment(aes(yend = reorder(row.names(coe_CI),desc(coe_CI$pval))),
xend = coe_CI$low_CI, color = "Blue") +
xlab("Coefficient with Confidence Interval")
)
# Line at 0 gives context
(g3 <- g2 +
geom_vline(xintercept = 0, color = "red"))